Skip to main content

SQL Commands for Django Development

sqlite3 api/db.sqlite3

Database Inspection

Basic Table Information

-- List all tables
.tables                              -- SQLite
SELECT name FROM sqlite_master WHERE type='table';

-- Describe table structure
.schema table_name                   -- SQLite
PRAGMA table_info(table_name);       -- SQLite detailed info

-- Show table creation SQL
.schema auth_user                    -- Example for Django's User table

Django-Specific Tables

-- Core Django tables
SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'django_%';

-- Your app tables (scraper)
SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'scraper_%';

-- Auth tables
SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'auth_%';

-- Session tables
SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'django_session%';

User Management

User Queries

-- List all users
SELECT id, username, email, is_active, is_staff, date_joined
FROM auth_user;

-- Find specific user
SELECT * FROM auth_user WHERE username = 'devuser';

-- Users with subscriptions
SELECT u.username, u.email, s.tier, s.monthly_requests_limit, s.monthly_requests_used
FROM auth_user u
LEFT JOIN scraper_usersubscription s ON u.id = s.user_id;

-- Active users only
SELECT username, email, last_login
FROM auth_user
WHERE is_active = 1
ORDER BY last_login DESC;

-- Staff users
SELECT username, email, is_superuser
FROM auth_user
WHERE is_staff = 1;

User Analysis

-- User registration timeline
SELECT DATE(date_joined) as date, COUNT(*) as registrations
FROM auth_user
GROUP BY DATE(date_joined)
ORDER BY date DESC;

-- Users by tier
SELECT s.tier, COUNT(*) as count
FROM scraper_usersubscription s
GROUP BY s.tier;

-- Inactive users
SELECT username, email, last_login, date_joined
FROM auth_user
WHERE is_active = 0 OR last_login IS NULL;

Application Data

Applications & Data Sources

-- All applications with user info
SELECT u.username, a.name, a.slug, a.status, a.created_at
FROM scraper_application a
JOIN auth_user u ON a.user_id = u.id
ORDER BY a.created_at DESC;

-- Applications with data source counts
SELECT a.name, a.slug, COUNT(ads.id) as data_sources
FROM scraper_application a
LEFT JOIN scraper_applicationdatasource ads ON a.id = ads.application_id
GROUP BY a.id, a.name, a.slug;

-- Data sources with platform info
SELECT a.name as app_name, p.name as platform_name,
       ads.source_alias, ads.is_primary, ads.sync_enabled
FROM scraper_applicationdatasource ads
JOIN scraper_application a ON ads.application_id = a.id
JOIN scraper_platformdataset pd ON ads.platform_dataset_id = pd.id
JOIN scraper_platform p ON pd.platform_id = p.id;

Platform Data

-- Platforms and their datasets
SELECT p.name, p.platform_type, COUNT(pd.id) as datasets
FROM scraper_platform p
LEFT JOIN scraper_platformdataset pd ON p.id = pd.platform_id
GROUP BY p.id, p.name;

-- Platform access by user
SELECT u.username, p.name, pa.datasets_limit, pa.can_export, pa.can_use_analytics
FROM scraper_platformaccess pa
JOIN scraper_usersubscription s ON pa.subscription_id = s.id
JOIN auth_user u ON s.user_id = u.id
JOIN scraper_platform p ON pa.platform_id = p.id;

Usage Analytics

API Usage

-- API usage by user (if you add user tracking to APILog)
SELECT DATE(timestamp) as date, COUNT(*) as requests
FROM api_logs
GROUP BY DATE(timestamp)
ORDER BY date DESC;

-- Most accessed endpoints
SELECT endpoint, COUNT(*) as hits, AVG(response_time_ms) as avg_response
FROM api_logs
GROUP BY endpoint
ORDER BY hits DESC;

-- Error analysis
SELECT status_code, COUNT(*) as count
FROM api_logs
WHERE status_code >= 400
GROUP BY status_code
ORDER BY count DESC;

-- Slow endpoints
SELECT endpoint, AVG(response_time_ms) as avg_time, COUNT(*) as requests
FROM api_logs
WHERE response_time_ms > 1000
GROUP BY endpoint
ORDER BY avg_time DESC;

Subscription Usage

-- Users approaching limits
SELECT u.username, s.tier, s.monthly_requests_used, s.monthly_requests_limit,
       ROUND((s.monthly_requests_used * 100.0 / s.monthly_requests_limit), 2) as usage_percent
FROM scraper_usersubscription s
JOIN auth_user u ON s.user_id = u.id
WHERE s.monthly_requests_used > (s.monthly_requests_limit * 0.8)
ORDER BY usage_percent DESC;

-- Tier utilization
SELECT tier,
       AVG(monthly_requests_used) as avg_usage,
       AVG(monthly_requests_limit) as avg_limit,
       COUNT(*) as users
FROM scraper_usersubscription
GROUP BY tier;

Cache Management

Cache Analysis

-- Cache overview
SELECT
    CASE
        WHEN cache_key LIKE '%keyword%' THEN 'Keywords'
        WHEN cache_key LIKE '%feature%' THEN 'Features'
        WHEN cache_key LIKE '%seo%' THEN 'SEO'
        ELSE 'Other'
    END as cache_type,
    COUNT(*) as entries,
    AVG(julianday('now') - julianday(updated_at)) as avg_age_days
FROM notion_cache
GROUP BY cache_type;

-- Stale cache entries (older than 1 hour)
SELECT cache_key, updated_at,
       ROUND((julianday('now') - julianday(updated_at)) * 24, 2) as hours_old
FROM notion_cache
WHERE datetime(updated_at) < datetime('now', '-1 hour')
ORDER BY updated_at;

-- Large cache entries
SELECT cache_key, LENGTH(json(data)) as size_bytes, updated_at
FROM notion_cache
ORDER BY size_bytes DESC
LIMIT 10;

Data Integrity Checks

Orphaned Records

-- Applications without users
SELECT a.* FROM scraper_application a
LEFT JOIN auth_user u ON a.user_id = u.id
WHERE u.id IS NULL;

-- Data sources without applications
SELECT ads.* FROM scraper_applicationdatasource ads
LEFT JOIN scraper_application a ON ads.application_id = a.id
WHERE a.id IS NULL;

-- Subscriptions without users
SELECT s.* FROM scraper_usersubscription s
LEFT JOIN auth_user u ON s.user_id = u.id
WHERE u.id IS NULL;

Consistency Checks

-- Users without subscriptions
SELECT u.username, u.email FROM auth_user u
LEFT JOIN scraper_usersubscription s ON u.id = s.user_id
WHERE s.id IS NULL AND u.is_active = 1;

-- Inactive subscriptions for active users
SELECT u.username, s.tier, s.is_active as sub_active, u.is_active as user_active
FROM auth_user u
JOIN scraper_usersubscription s ON u.id = s.user_id
WHERE u.is_active = 1 AND s.is_active = 0;

Development Helpers

Quick Data Resets

-- Reset user API usage
UPDATE scraper_usersubscription SET monthly_requests_used = 0;

-- Reset specific user usage
UPDATE scraper_usersubscription
SET monthly_requests_used = 0
WHERE user_id = (SELECT id FROM auth_user WHERE username = 'devuser');

-- Clear old cache entries
DELETE FROM notion_cache
WHERE datetime(updated_at) < datetime('now', '-24 hours');

-- Clear old API logs
DELETE FROM api_logs
WHERE datetime(timestamp) < datetime('now', '-7 days');

Data Generation for Testing

-- Insert test API logs
INSERT INTO api_logs (endpoint, method, ip_address, user_agent, response_time_ms, status_code)
VALUES
    ('/scrape/apps/', 'GET', '127.0.0.1', 'TestClient', 150, 200),
    ('/scrape/auth/login/', 'POST', '127.0.0.1', 'TestClient', 300, 200),
    ('/scrape/platforms/', 'GET', '127.0.0.1', 'TestClient', 1200, 200);

-- Simulate API usage
UPDATE scraper_usersubscription
SET monthly_requests_used = monthly_requests_used + 50
WHERE user_id = (SELECT id FROM auth_user WHERE username = 'devuser');

Useful Joins & Complex Queries

User Activity Summary

SELECT
    u.username,
    u.is_active,
    s.tier,
    s.monthly_requests_used,
    s.monthly_requests_limit,
    COUNT(DISTINCT a.id) as app_count,
    COUNT(DISTINCT ads.id) as data_source_count
FROM auth_user u
LEFT JOIN scraper_usersubscription s ON u.id = s.user_id
LEFT JOIN scraper_application a ON u.id = a.user_id
LEFT JOIN scraper_applicationdatasource ads ON a.id = ads.application_id
GROUP BY u.id, u.username
ORDER BY app_count DESC;

Platform Usage Analysis

SELECT
    p.name as platform,
    COUNT(DISTINCT u.id) as unique_users,
    COUNT(DISTINCT a.id) as applications,
    COUNT(DISTINCT ads.id) as data_sources
FROM scraper_platform p
JOIN scraper_platformdataset pd ON p.id = pd.platform_id
JOIN scraper_applicationdatasource ads ON pd.id = ads.platform_dataset_id
JOIN scraper_application a ON ads.application_id = a.id
JOIN auth_user u ON a.user_id = u.id
GROUP BY p.id, p.name;

SQLite-Specific Commands

Database Management

-- Database info
PRAGMA database_list;
PRAGMA page_count;
PRAGMA page_size;

-- Vacuum database (reclaim space)
VACUUM;

-- Analyze database (update stats)
ANALYZE;

-- Check integrity
PRAGMA integrity_check;

-- Show indexes
.indexes table_name

-- Explain query plan
EXPLAIN QUERY PLAN SELECT * FROM auth_user WHERE username = 'devuser';

Performance Tuning

-- Create indexes for common queries
CREATE INDEX idx_auth_user_username ON auth_user(username);
CREATE INDEX idx_api_logs_timestamp ON api_logs(timestamp);
CREATE INDEX idx_notion_cache_updated ON notion_cache(updated_at);

-- Drop index
DROP INDEX idx_name;

-- Show query execution time
.timer ON
SELECT COUNT(*) FROM auth_user;

Quick Reference Commands

Access SQLite DB

# From your project root
sqlite3 api/db.sqlite3

# Or via Django
./dj.sh shell -c "from django.db import connection; print(connection.settings_dict)"

Common Django Shell Queries

# In Django shell (./dj.sh shell)
from django.contrib.auth.models import User
from core.models import UserSubscription
from core.models import Application

# Quick user lookup
User.objects.get(username='devuser')

# Users with high API usage
UserSubscription.objects.filter(monthly_requests_used__gt=1000)

# Applications created today
from django.utils import timezone
Application.objects.filter(created_at__date=timezone.now().date())

Backup & Restore

# Backup database
sqlite3 api/db.sqlite3 ".backup backup_$(date +%Y%m%d_%H%M%S).db"

# Restore database
cp backup_20240909_103000.db api/db.sqlite3

# Export as SQL
sqlite3 api/db.sqlite3 ".dump" > backup.sql

# Import from SQL
sqlite3 new_db.sqlite3 < backup.sql
This reference covers the most common SQL operations you’ll need during development, from basic queries to performance analysis and debugging.