- - Resume comparison

Python for resume comparison

Python script that compares 4 resumes across 6 specific categories:

1. Visualization

2. Data Engineering

3. Python

4. Power BI

5. Storytelling

6. Experience

import re

import pandas as pd

from pathlib import Path

# ========== CONFIGURATION ==========

# Define categories and related keywords (you can adjust these)

CATEGORY_KEYWORDS = {

"Visualization": ["visualization", "dashboard", "tableau", "power bi", "lookerstudio", "data visualization", "charts", "plots", "infographic"],

"Data Engineer": ["data pipeline", "etl", "data warehouse", "data lake", "airflow", "snowflake", "databricks", "spark", "dbt", "bigquery", "redshift"],

"Python": ["python", "pandas", "numpy", "scikit", "tensorflow", "matplotlib", "flask", "django"],

"Power BI": ["power bi", "dax", "powerquery", "m language", "data model", "report builder"],

"Storytelling": ["storytelling", "data storytelling", "insight", "narrative", "business story", "data-driven decision", "presentation"],

"Experience": ["years", "experience", "lead", "managed", "delivered", "designed", "implemented", "developed", "strategic", "team", "project"]

}

# ========== HELPER FUNCTIONS ==========

def normalize_text(text):

"""Basic text normalization."""

text = text.lower()

text = re.sub(r'[^a-z0-9\s\+\-\.]', ' ', text)

text = re.sub(r'\s+', ' ', text).strip()

return text

def score_resume(text, categories):

"""Count keyword matches per category."""

text = normalize_text(text)

scores = {}

for cat, keywords in categories.items():

count = sum(len(re.findall(r'\b' + re.escape(k) + r'\b', text)) for k in keywords)

scores[cat] = count

scores["Total"] = sum(scores.values())

return scores

# ========== SAMPLE INPUT (Replace with your resume text or file reads) ==========

resume_texts = {

"Resume A": """Head of Data with 15+ years... Skilled in Power BI, Python, Tableau, and data storytelling...""",

"Resume B": """Senior Data Engineer experienced with Airflow, dbt, Databricks, and Snowflake...""",

"Resume C": """Analytics professional focusing on visualization using Power BI and Looker Studio...""",

"Resume D": """Python developer and data storyteller with hands-on experience in ETL, data pipelines, and BI tools..."""

}

# ========== COMPUTE SCORES ==========

results = []

for name, text in resume_texts.items():

row = {"Resume": name}

row.update(score_resume(text, CATEGORY_KEYWORDS))

results.append(row)

df = pd.DataFrame(results)

# Normalize scores (relative to max per category)

for cat in CATEGORY_KEYWORDS.keys():

df[f"{cat} Score (%)"] = (df[cat] / df[cat].max() * 100).round(1)

# Sort by total score

df = df.sort_values("Total", ascending=False).reset_index(drop=True)

# ========== OUTPUT ==========

print("\n===== Resume Comparison Summary =====\n")

print(df.to_markdown(index=False))

# Save to HTML or CSV

output_path_html = Path("resume_comparison_4resumes.html")

output_path_csv = Path("resume_comparison_4resumes.csv")

df.to_html(output_path_html, index=False)

df.to_csv(output_path_csv, index=False)

print(f"\nReport saved as:\n - {output_path_html.resolve()}\n - {output_path_csv.resolve()}")

Sample Report

Page updated

Google Sites

Report abuse