HH/apps/social/tasks/linkedin.py
2026-02-12 15:09:48 +03:00

410 lines
15 KiB
Python

from celery import shared_task
from django.utils import timezone
from django.db import transaction
from apps.social.models import SocialAccount, SocialContent, SocialComment
from apps.social.services.linkedin import LinkedInService, LinkedInAPIError
from apps.social.utils.linkedin import LinkedInConstants
import logging
import time
logger = logging.getLogger(__name__)
# ==========================================
# INITIAL HISTORICAL SYNC (One-Time)
# ==========================================
@shared_task(bind=True, max_retries=3)
def initial_historical_sync_task(self, account_id, max_comments_per_post=None):
"""
ONE-TIME initial historical sync when client first connects.
Fetches posts and limited historical comments to bootstrap the database.
"""
if max_comments_per_post is None:
max_comments_per_post = LinkedInConstants.INITIAL_SYNC_COMMENT_LIMIT
try:
account = SocialAccount.objects.select_for_update().get(
id=account_id,
platform_type='LI'
)
# Check token validity
if account.is_token_expired():
account.is_active = False
account.save()
logger.error(f"Account {account.name} token expired")
return
logger.info(f"Starting initial historical sync for {account.name}")
# Fetch all posts (returns full URNs)
posts_data = LinkedInService.fetch_posts(account, count=100)
logger.info(f"Found {len(posts_data)} posts for {account.name}")
total_comments_synced = 0
for idx, post_data in enumerate(posts_data, 1):
post_urn = post_data.get('id')
if not post_urn:
logger.warning(f"Post missing ID, skipping: {post_data}")
continue
# Extract post data
commentary = post_data.get('commentary', '')
created_time_ms = post_data.get('created', {}).get('time')
created_at = LinkedInService._parse_timestamp(created_time_ms)
# Create or update post
with transaction.atomic():
post, created = SocialContent.objects.get_or_create(
platform_type='LI',
content_id=post_urn,
defaults={
'account': account,
'text': commentary,
'created_at': created_at,
'content_data': post_data,
'last_comment_sync_at': None
}
)
if not created and post.last_comment_sync_at:
logger.info(f"Post {post_urn} already synced, skipping")
continue
logger.info(f"Processing post {idx}/{len(posts_data)}: {post_urn}")
# Fetch LIMITED historical comments using full URN
comments_data = LinkedInService.fetch_comments_limited(
account,
post_urn,
limit=max_comments_per_post
)
logger.info(f"Found {len(comments_data)} comments for post {post_urn}")
latest_comment_time = created_at
# Save comments in batch
with transaction.atomic():
for c_data in comments_data:
comment_id = c_data.get('id')
if not comment_id:
continue
c_time_ms = c_data.get('created', {}).get('time')
c_time = LinkedInService._parse_timestamp(c_time_ms)
actor = c_data.get('actor', 'Unknown')
message_text = c_data.get('message', {}).get('text', '')
SocialComment.objects.update_or_create(
platform_type='LI',
comment_id=comment_id,
defaults={
'account': account,
'content': post,
'author_name': actor,
'author_id': actor,
'text': message_text,
'created_at': c_time,
'synced_via_webhook': False,
'comment_data': c_data
}
)
if c_time > latest_comment_time:
latest_comment_time = c_time
total_comments_synced += 1
# Update post's last sync timestamp
post.last_comment_sync_at = latest_comment_time
post.save()
# Rate limit protection
time.sleep(0.5)
# Mark account as synced
account.last_synced_at = timezone.now()
account.save()
logger.info(
f"✅ Initial historical sync complete for {account.name}: "
f"{len(posts_data)} posts, {total_comments_synced} comments"
)
except SocialAccount.DoesNotExist:
logger.error(f"Account {account_id} not found")
except Exception as e:
logger.error(f"Initial historical sync error for account {account_id}: {e}")
# Retry with exponential backoff
raise self.retry(exc=e, countdown=60 * (2 ** self.request.retries))
# ==========================================
# DELTA SYNC FOR NEW COMMENTS (Scheduled)
# ==========================================
@shared_task(bind=True, max_retries=3)
def sync_new_comments_task(self, account_id):
"""
DELTA SYNC for fetching only NEW comments since last sync.
"""
try:
account = SocialAccount.objects.select_for_update().get(
id=account_id,
platform_type='LI'
)
# Check token validity
if account.is_token_expired():
account.is_active = False
account.save()
logger.error(f"Account {account.name} token expired")
return
# Check if account has been initially synced
if not account.last_synced_at:
logger.warning(
f"Account {account.name} not initially synced. "
f"Run initial_historical_sync_task first."
)
return
logger.info(f"Starting delta sync for new comments: {account.name}")
# Fetch posts
posts_data = LinkedInService.fetch_posts(account, count=100)
total_new_comments = 0
for post_data in posts_data:
post_urn = post_data.get('id')
if not post_urn:
continue
# Get or create post record
try:
post = SocialContent.objects.get(
platform_type='LI',
content_id=post_urn
)
except SocialContent.DoesNotExist:
# This post wasn't in initial sync, create it
commentary = post_data.get('commentary', '')
created_time_ms = post_data.get('created', {}).get('time')
created_at = LinkedInService._parse_timestamp(created_time_ms)
post = SocialContent.objects.create(
platform_type='LI',
content_id=post_urn,
account=account,
text=commentary,
created_at=created_at,
content_data=post_data,
last_comment_sync_at=None
)
# Fetch only NEW comments since last sync using full URN
since_timestamp = post.last_comment_sync_at
comments_data = LinkedInService.fetch_comments_delta(
account,
post_urn,
since_timestamp=since_timestamp
)
if not comments_data:
continue
logger.info(f"Found {len(comments_data)} new comments for post {post_urn}")
latest_comment_time = since_timestamp or timezone.now()
# Save new comments
with transaction.atomic():
for c_data in comments_data:
comment_id = c_data.get('id')
if not comment_id:
continue
c_time_ms = c_data.get('created', {}).get('time')
c_time = LinkedInService._parse_timestamp(c_time_ms)
actor = c_data.get('actor', 'Unknown')
message_text = c_data.get('message', {}).get('text', '')
comment, created = SocialComment.objects.update_or_create(
platform_type='LI',
comment_id=comment_id,
defaults={
'account': account,
'content': post,
'author_name': actor,
'author_id': actor,
'text': message_text,
'created_at': c_time,
'synced_via_webhook': False,
'comment_data': c_data
}
)
if created:
total_new_comments += 1
if c_time > latest_comment_time:
latest_comment_time = c_time
# Update sync timestamp
if latest_comment_time > (post.last_comment_sync_at or timezone.make_aware(timezone.datetime(1970, 1, 1))):
post.last_comment_sync_at = latest_comment_time
post.save()
time.sleep(0.3)
# Update account sync time
account.last_synced_at = timezone.now()
account.save()
logger.info(
f"✅ Delta sync complete for {account.name}: "
f"{total_new_comments} new comments"
)
except SocialAccount.DoesNotExist:
logger.error(f"Account {account_id} not found")
except Exception as e:
logger.error(f"Delta sync error for account {account_id}: {e}")
raise self.retry(exc=e, countdown=60 * (2 ** self.request.retries))
# ==========================================
# WEBHOOK PROCESSING (Real-Time)
# ==========================================
@shared_task(bind=True, max_retries=3)
def process_webhook_comment_task(self, account_id, post_urn, comment_id):
"""
Process a single comment triggered by webhook (REAL-TIME sync).
post_urn: Full URN expected (e.g., urn:li:share:123)
"""
try:
account = SocialAccount.objects.get(id=account_id, platform_type='LI')
logger.info(f"Processing webhook comment {comment_id} for post {post_urn}")
# Fetch the specific comment (most efficient)
comment_data = LinkedInService.fetch_single_comment(account, post_urn, comment_id)
if not comment_data:
logger.warning(
f"Could not fetch comment {comment_id} for post {post_urn}. "
f"It may have been deleted."
)
return
# Get or create the post
try:
content = SocialContent.objects.get(
platform_type='LI',
content_id=post_urn
)
except SocialContent.DoesNotExist:
logger.warning(f"Post {post_urn} not found in database, creating it")
# Post doesn't exist, we need to fetch it
posts_data = LinkedInService.fetch_posts(account, count=1)
matching_post = next((p for p in posts_data if p.get('id') == post_urn), None)
if matching_post:
commentary = matching_post.get('commentary', '')
created_time_ms = matching_post.get('created', {}).get('time')
created_at = LinkedInService._parse_timestamp(created_time_ms)
content = SocialContent.objects.create(
platform_type='LI',
content_id=post_urn,
account=account,
text=commentary,
created_at=created_at,
content_data=matching_post
)
else:
logger.error(f"Could not fetch post {post_urn} from API")
return
# Parse comment data
c_time_ms = comment_data.get('created', {}).get('time')
c_time = LinkedInService._parse_timestamp(c_time_ms)
actor = comment_data.get('actor', 'Unknown')
message_text = comment_data.get('message', {}).get('text', '')
# Save or update comment
with transaction.atomic():
comment, created = SocialComment.objects.update_or_create(
platform_type='LI',
comment_id=comment_id,
defaults={
'account': account,
'content': content,
'author_name': actor,
'author_id': actor,
'text': message_text,
'created_at': c_time,
'synced_via_webhook': True,
'comment_data': comment_data
}
)
# Update post's last comment sync time
if c_time > (content.last_comment_sync_at or timezone.make_aware(timezone.datetime(1970, 1, 1))):
content.last_comment_sync_at = c_time
content.save()
action = "Created" if created else "Updated"
logger.info(f"{action} comment {comment_id} via webhook")
except SocialAccount.DoesNotExist:
logger.error(f"Account {account_id} not found for webhook processing")
except Exception as e:
logger.error(f"Webhook processing error: {e}")
raise self.retry(exc=e, countdown=30 * (2 ** self.request.retries))
# ==========================================
# BULK OPERATIONS
# ==========================================
@shared_task
def sync_all_accounts_task():
"""
Wrapper to sync ALL active accounts (for scheduled Celery Beat job).
Schedule this with Celery Beat to run every 15-30 minutes.
"""
accounts = SocialAccount.objects.filter(
platform_type='LI',
is_active=True,
last_synced_at__isnull=False # Only sync accounts that have been initially synced
)
logger.info(f"Starting delta sync for {accounts.count()} LinkedIn accounts")
for account in accounts:
# Queue each account sync as separate task
sync_new_comments_task.delay(account.id)
logger.info(f"Queued delta sync tasks for {accounts.count()} accounts")
# @shared_task
# def cleanup_old_comments_task(days_to_keep=90):
# """Optional: Clean up very old comments to save database space."""
# cutoff_date = timezone.now() - timezone.timedelta(days=days_to_keep)
# deleted_count, _ = SocialComment.objects.filter(
# platform_type='LI',
# created_at__lt=cutoff_date
# ).delete()
# logger.info(f"Cleaned up {deleted_count} LinkedIn comments older than {days_to_keep} days")