resume data extraction
This commit is contained in:
parent
1aa8b6800a
commit
ede6b2760b
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -45,7 +45,7 @@ INSTALLED_APPS = [
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
'rest_framework',
|
||||
'recruitment',
|
||||
'recruitment.apps.RecruitmentConfig',
|
||||
'corsheaders',
|
||||
'django.contrib.sites',
|
||||
'allauth',
|
||||
|
||||
BIN
db.sqlite3
BIN
db.sqlite3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
recruitment/__pycache__/linkedin_service.cpython-312.pyc
Normal file
BIN
recruitment/__pycache__/linkedin_service.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
recruitment/__pycache__/signals.cpython-312.pyc
Normal file
BIN
recruitment/__pycache__/signals.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
recruitment/__pycache__/validators.cpython-312.pyc
Normal file
BIN
recruitment/__pycache__/validators.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,6 +1,6 @@
|
||||
from django.contrib import messages
|
||||
from . import models
|
||||
from .utils import extract_summary_from_pdf
|
||||
# from .utils import extract_summary_from_pdf
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.admin import UserAdmin as BaseUserAdmin
|
||||
@ -27,20 +27,25 @@ class GroupAdmin(BaseGroupAdmin, ModelAdmin):
|
||||
pass
|
||||
|
||||
|
||||
@admin.register(models.JobPosting)
|
||||
class JobPostingAdmin(ModelAdmin):
|
||||
list_display = ('title','description','qualifications')
|
||||
|
||||
|
||||
@admin.register(models.Job)
|
||||
class JobAdmin(ModelAdmin):
|
||||
list_display = ('title', 'is_published', 'posted_to_linkedin', 'created_at')
|
||||
list_filter = ('is_published', 'posted_to_linkedin')
|
||||
search_fields = ('title', 'description_en', 'description_ar')
|
||||
|
||||
@admin.action(description="Parse selected resumes")
|
||||
def parse_resumes(modeladmin, request, queryset):
|
||||
for candidate in queryset:
|
||||
if candidate.resume:
|
||||
summary = extract_summary_from_pdf(candidate.resume.path)
|
||||
candidate.parsed_summary = str(summary)
|
||||
candidate.save()
|
||||
messages.success(request, f"Parsed {queryset.count()} resumes successfully.")
|
||||
# @admin.action(description="Parse selected resumes")
|
||||
# def parse_resumes(modeladmin, request, queryset):
|
||||
# for candidate in queryset:
|
||||
# if candidate.resume:
|
||||
# summary = extract_summary_from_pdf(candidate.resume.path)
|
||||
# candidate.parsed_summary = str(summary)
|
||||
# candidate.save()
|
||||
# messages.success(request, f"Parsed {queryset.count()} resumes successfully.")
|
||||
|
||||
@admin.register(models.Candidate)
|
||||
class CandidateAdmin(ModelAdmin):
|
||||
@ -48,7 +53,7 @@ class CandidateAdmin(ModelAdmin):
|
||||
list_filter = ('applied', 'job')
|
||||
search_fields = ('name', 'email')
|
||||
# readonly_fields = ('parsed_summary',)
|
||||
actions = [parse_resumes]
|
||||
|
||||
|
||||
@admin.register(models.TrainingMaterial)
|
||||
class TrainingMaterialAdmin(ModelAdmin):
|
||||
|
||||
@ -4,3 +4,5 @@ from django.apps import AppConfig
|
||||
class RecruitmentConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'recruitment'
|
||||
def ready(self):
|
||||
import recruitment.signals
|
||||
|
||||
BIN
recruitment/management/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
recruitment/management/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
@ -0,0 +1,33 @@
|
||||
# Generated by Django 5.2.7 on 2025-10-05 13:12
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('recruitment', '0012_form_formsubmission_uploadedfile'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='candidate',
|
||||
name='criteria_checklist',
|
||||
field=models.JSONField(blank=True, default=dict),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='candidate',
|
||||
name='match_score',
|
||||
field=models.IntegerField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='candidate',
|
||||
name='strengths',
|
||||
field=models.TextField(blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='candidate',
|
||||
name='weaknesses',
|
||||
field=models.TextField(blank=True),
|
||||
),
|
||||
]
|
||||
31
recruitment/migrations/0014_source_jobposting_source.py
Normal file
31
recruitment/migrations/0014_source_jobposting_source.py
Normal file
@ -0,0 +1,31 @@
|
||||
# Generated by Django 5.2.7 on 2025-10-05 16:11
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('recruitment', '0013_candidate_criteria_checklist_candidate_match_score_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Source',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(choices=[('ATS', 'Applicant Tracking System'), ('ERP', 'ERP system')], max_length=100, verbose_name='Source Type')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Source',
|
||||
'verbose_name_plural': 'Sources',
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='jobposting',
|
||||
name='source',
|
||||
field=models.ForeignKey(blank=True, help_text='The system or channel from which this job posting originated or was first published.', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='job_postings', to='recruitment.source'),
|
||||
),
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -103,6 +103,15 @@ class JobPosting(Base):
|
||||
start_date = models.DateField(null=True, blank=True, help_text="Desired start date")
|
||||
open_positions = models.PositiveIntegerField(default=1, help_text="Number of open positions for this job")
|
||||
|
||||
source = models.ForeignKey(
|
||||
'Source',
|
||||
on_delete=models.SET_NULL, # Recommended: If a source is deleted, job's source is set to NULL
|
||||
related_name='job_postings',
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text="The system or channel from which this job posting originated or was first published."
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ['-created_at']
|
||||
verbose_name = "Job Posting"
|
||||
@ -114,7 +123,7 @@ class JobPosting(Base):
|
||||
def save(self, *args, **kwargs):
|
||||
# Generate unique internal job ID if not exists
|
||||
if not self.internal_job_id:
|
||||
prefix = "UNIV"
|
||||
prefix = "KAAUH"
|
||||
year = timezone.now().year
|
||||
# Get next sequential number
|
||||
last_job = JobPosting.objects.filter(
|
||||
@ -188,6 +197,12 @@ class Candidate(Base):
|
||||
offer_status = models.CharField(choices=Status.choices,max_length=100, null=True, blank=True, verbose_name=_('Offer Status'))
|
||||
join_date = models.DateField(null=True, blank=True, verbose_name=_('Join Date'))
|
||||
|
||||
# Scoring fields (populated by signal)
|
||||
match_score = models.IntegerField(null=True, blank=True)
|
||||
strengths = models.TextField(blank=True)
|
||||
weaknesses = models.TextField(blank=True)
|
||||
criteria_checklist = models.JSONField(default=dict, blank=True)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Candidate')
|
||||
verbose_name_plural = _('Candidates')
|
||||
@ -297,6 +312,8 @@ class Form(models.Model):
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
|
||||
|
||||
class FormSubmission(models.Model):
|
||||
form = models.ForeignKey(Form, on_delete=models.CASCADE, related_name='submissions')
|
||||
submission_data = models.JSONField(default=dict) # Stores form responses
|
||||
@ -312,4 +329,32 @@ class UploadedFile(models.Model):
|
||||
field_id = models.CharField(max_length=100)
|
||||
file = models.FileField(upload_to='form_uploads/%Y/%m/%d/')
|
||||
original_filename = models.CharField(max_length=255)
|
||||
uploaded_at = models.DateTimeField(auto_now_add=True)
|
||||
uploaded_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
|
||||
|
||||
|
||||
class Source(models.Model):
|
||||
class SourceType(models.TextChoices):
|
||||
ATS = 'ATS', _('Applicant Tracking System')
|
||||
CRM = 'ERP', _('ERP system')
|
||||
|
||||
|
||||
name = models.CharField(
|
||||
max_length=100,
|
||||
choices=SourceType.choices,
|
||||
verbose_name=_('Source Type')
|
||||
)
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.get_name_display()}"
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('Source')
|
||||
verbose_name_plural = _('Sources')
|
||||
|
||||
|
||||
|
||||
|
||||
71
recruitment/signals.py
Normal file
71
recruitment/signals.py
Normal file
@ -0,0 +1,71 @@
|
||||
from django.db.models.signals import post_save
|
||||
from django.dispatch import receiver
|
||||
from . import models
|
||||
|
||||
# @receiver(post_save, sender=models.Candidate)
|
||||
# def parse_resume(sender, instance, created, **kwargs):
|
||||
# if instance.resume and not instance.summary:
|
||||
# from .utils import extract_summary_from_pdf,match_resume_with_job_description
|
||||
# summary = extract_summary_from_pdf(instance.resume.path)
|
||||
# if 'error' not in summary:
|
||||
# instance.summary = summary
|
||||
# instance.save()
|
||||
|
||||
# match_resume_with_job_description
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
import os
|
||||
from .utils import extract_text_from_pdf,score_resume_with_openrouter
|
||||
|
||||
|
||||
|
||||
@receiver(post_save, sender=models.Candidate)
|
||||
def score_candidate_resume(sender, instance, created, **kwargs):
|
||||
# Skip if no resume or OpenRouter not configured
|
||||
if instance.resume is None:
|
||||
return
|
||||
if kwargs.get('update_fields') is not None:
|
||||
return
|
||||
|
||||
# Optional: Only re-score if resume changed (advanced: track file hash)
|
||||
# For simplicity, we score on every save with a resume
|
||||
|
||||
try:
|
||||
# Get absolute file path
|
||||
file_path = instance.resume.path
|
||||
if not os.path.exists(file_path):
|
||||
logger.warning(f"Resume file not found: {file_path}")
|
||||
return
|
||||
|
||||
resume_text = extract_text_from_pdf(file_path)
|
||||
# if not resume_text:
|
||||
# instance.scoring_error = "Could not extract text from resume."
|
||||
# instance.save(update_fields=['scoring_error'])
|
||||
# return
|
||||
|
||||
result = score_resume_with_openrouter(resume_text)
|
||||
|
||||
# Update candidate with scoring results
|
||||
instance.match_score = result.get('match_score')
|
||||
instance.strengths = result.get('strengths', '')
|
||||
instance.weaknesses = result.get('weaknesses', '')
|
||||
instance.criteria_checklist = result.get('criteria_checklist', {})
|
||||
|
||||
|
||||
|
||||
# Save only scoring-related fields to avoid recursion
|
||||
instance.save(update_fields=[
|
||||
'match_score', 'strengths', 'weaknesses',
|
||||
'criteria_checklist'
|
||||
])
|
||||
|
||||
logger.info(f"Successfully scored resume for candidate {instance.id}")
|
||||
|
||||
except Exception as e:
|
||||
# error_msg = str(e)[:500] # Truncate to fit TextField
|
||||
# instance.scoring_error = error_msg
|
||||
# instance.save(update_fields=['scoring_error'])
|
||||
logger.error(f"Failed to score resume for candidate {instance.id}: {e}")
|
||||
|
||||
|
||||
@ -1,32 +1,124 @@
|
||||
import os
|
||||
import fitz # PyMuPDF
|
||||
import spacy
|
||||
import requests
|
||||
# import os
|
||||
# import fitz # PyMuPDF
|
||||
# import spacy
|
||||
# import requests
|
||||
from recruitment import models
|
||||
from django.conf import settings
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
# nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
# def extract_text_from_pdf(pdf_path):
|
||||
# text = ""
|
||||
# with fitz.open(pdf_path) as doc:
|
||||
# for page in doc:
|
||||
# text += page.get_text()
|
||||
# return text
|
||||
|
||||
# def extract_summary_from_pdf(pdf_path):
|
||||
# if not os.path.exists(pdf_path):
|
||||
# return {'error': 'File not found'}
|
||||
|
||||
# text = extract_text_from_pdf(pdf_path)
|
||||
# doc = nlp(text)
|
||||
# summary = {
|
||||
# 'name': doc.ents[0].text if doc.ents else '',
|
||||
# 'skills': [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) > 1],
|
||||
# 'summary': text[:500]
|
||||
# }
|
||||
# return summary
|
||||
|
||||
import requests
|
||||
from PyPDF2 import PdfReader
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENROUTER_API_KEY ='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'
|
||||
OPENROUTER_MODEL = 'qwen/qwen-2.5-72b-instruct:free'
|
||||
|
||||
if not OPENROUTER_API_KEY:
|
||||
logger.warning("OPENROUTER_API_KEY not set. Resume scoring will be skipped.")
|
||||
|
||||
def extract_text_from_pdf(file_path):
|
||||
print("text extraction")
|
||||
text = ""
|
||||
with fitz.open(pdf_path) as doc:
|
||||
for page in doc:
|
||||
text += page.get_text()
|
||||
return text
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
reader = PdfReader(f)
|
||||
for page in reader.pages:
|
||||
text += (page.extract_text() or "")
|
||||
except Exception as e:
|
||||
logger.error(f"PDF extraction failed: {e}")
|
||||
raise
|
||||
return text.strip()
|
||||
|
||||
def extract_summary_from_pdf(pdf_path):
|
||||
if not os.path.exists(pdf_path):
|
||||
return {'error': 'File not found'}
|
||||
def score_resume_with_openrouter(resume_text):
|
||||
prompt = f"""
|
||||
You are an expert technical recruiter. Your task is to score the following candidate for the role of a Senior Data Analyst based on the provided job criteria.
|
||||
|
||||
text = extract_text_from_pdf(pdf_path)
|
||||
doc = nlp(text)
|
||||
summary = {
|
||||
'name': doc.ents[0].text if doc.ents else '',
|
||||
'skills': [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) > 1],
|
||||
'summary': text[:500]
|
||||
}
|
||||
return summary
|
||||
**Job Criteria:**
|
||||
- Must-Have Skills: Python, SQL, 5+ years of experience.
|
||||
- Nice-to-Have Skills: Tableau, AWS.
|
||||
- Experience: Must have led at least one project.
|
||||
|
||||
**Candidate's Extracted Resume Text:**
|
||||
\"\"\"
|
||||
{resume_text}
|
||||
\"\"\"
|
||||
|
||||
**Your Task:**
|
||||
Provide a response in strict JSON format with the following keys:
|
||||
1. 'match_score': A score from 0 to 100 representing how well the candidate fits the role.
|
||||
2. 'strengths': A brief summary of why the candidate is a strong fit, referencing specific criteria.
|
||||
3. 'weaknesses': A brief summary of where the candidate falls short or what criteria are missing.
|
||||
4. 'criteria_checklist': An object where you rate the candidate's match for each specific criterion (e.g., {{'Python': 'Met', 'AWS': 'Not Mentioned'}}).
|
||||
|
||||
Only output valid JSON. Do not include any other text.
|
||||
"""
|
||||
print("model call")
|
||||
response = requests.post(
|
||||
url="https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
data=json.dumps({
|
||||
"model": OPENROUTER_MODEL,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
},
|
||||
)
|
||||
)
|
||||
# print(response.status_code)
|
||||
# print(response.json())
|
||||
res = {}
|
||||
print(response.status_code)
|
||||
if response.status_code == 200:
|
||||
res = response.json()
|
||||
content = res["choices"][0]['message']['content']
|
||||
try:
|
||||
print(content)
|
||||
content = content.replace("```json","").replace("```","")
|
||||
print(content)
|
||||
res = json.loads(content)
|
||||
print(res)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
# res = raw_output["choices"][0]["message"]["content"]
|
||||
else:
|
||||
print("error response")
|
||||
return res
|
||||
# print(f"rawraw_output)
|
||||
# print(response)
|
||||
|
||||
|
||||
|
||||
# def match_resume_with_job_description(resume, job_description,prompt=""):
|
||||
# resume_doc = nlp(resume)
|
||||
# job_doc = nlp(job_description)
|
||||
# similarity = resume_doc.similarity(job_doc)
|
||||
# return similarity
|
||||
|
||||
def dashboard_callback(request, context):
|
||||
total_jobs = models.Job.objects.count()
|
||||
|
||||
BIN
static/media/resumes/resume_juanjosecarin.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_2pTre0O.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_2pTre0O.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_4LEMHNf.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_4LEMHNf.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_83fjxF9.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_83fjxF9.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_BJ9ugt1.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_BJ9ugt1.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_BRHciWm.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_BRHciWm.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_F5l3AkI.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_F5l3AkI.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_H0J04WR.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_H0J04WR.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_RmEZple.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_RmEZple.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_T5yg0Gh.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_T5yg0Gh.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_bXsV4rl.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_bXsV4rl.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_k9oYeu8.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_k9oYeu8.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_lyYmurS.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_lyYmurS.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_pDQB73d.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_pDQB73d.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_qCLGCKw.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_qCLGCKw.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_xg0Feln.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_xg0Feln.pdf
Normal file
Binary file not shown.
BIN
static/media/resumes/resume_juanjosecarin_yedHjGr.pdf
Normal file
BIN
static/media/resumes/resume_juanjosecarin_yedHjGr.pdf
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user