PostgreSQL

Queries & Joins

Queries & Joins Basic SELECT -- Basic query SELECT id, name, email FROM users WHERE active = true ORDER BY name LIMIT 10 OFFSET 20; -- Aliases and expressions S…

Queries & Joins

Basic SELECT

-- Basic query
SELECT id, name, email FROM users WHERE active = true ORDER BY name LIMIT 10 OFFSET 20;

-- Aliases and expressions
SELECT
  first_name || ' ' || last_name AS full_name,
  EXTRACT(YEAR FROM created_at) AS year,
  COALESCE(phone, 'N/A') AS phone
FROM users;

-- Distinct
SELECT DISTINCT department FROM employees;

-- Aggregate functions
SELECT
  department,
  COUNT(*) AS total,
  AVG(salary) AS avg_salary,
  MAX(salary) AS max_salary
FROM employees
GROUP BY department
HAVING COUNT(*) > 5
ORDER BY avg_salary DESC;

JOIN Types

-- INNER JOIN — only matching rows
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- LEFT JOIN — all from left, NULLs for unmatched right
SELECT u.name, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- RIGHT JOIN — all from right
SELECT u.name, o.total
FROM orders o
RIGHT JOIN users u ON o.user_id = u.id;

-- FULL OUTER JOIN — all rows from both
SELECT u.name, o.total
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;

-- CROSS JOIN — cartesian product
SELECT p.name, c.name AS color
FROM products p
CROSS JOIN colors c;

-- Self join
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

Subqueries

-- Subquery in WHERE
SELECT name FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 100);

-- Correlated subquery
SELECT name, salary
FROM employees e
WHERE salary > (
  SELECT AVG(salary) FROM employees WHERE department = e.department
);

-- EXISTS
SELECT name FROM users u
WHERE EXISTS (
  SELECT 1 FROM orders WHERE user_id = u.id AND status = 'pending'
);

-- Subquery in FROM (derived table)
SELECT dept, avg_salary
FROM (
  SELECT department AS dept, AVG(salary) AS avg_salary
  FROM employees
  GROUP BY department
) sub
WHERE avg_salary > 50000;

CTEs (Common Table Expressions)

-- Basic CTE
WITH recent_orders AS (
  SELECT user_id, COUNT(*) AS cnt
  FROM orders
  WHERE created_at > NOW() - INTERVAL '30 days'
  GROUP BY user_id
)
SELECT u.name, ro.cnt
FROM users u
JOIN recent_orders ro ON u.id = ro.user_id;

-- Multiple CTEs
WITH
  active_users AS (SELECT id FROM users WHERE active = true),
  user_orders AS (SELECT user_id, SUM(total) AS revenue FROM orders GROUP BY user_id)
SELECT u.id, uo.revenue
FROM active_users u
JOIN user_orders uo ON u.id = uo.user_id;

-- Recursive CTE — org chart / tree traversal
WITH RECURSIVE org_tree AS (
  -- Base case: top-level employees
  SELECT id, name, manager_id, 0 AS depth
  FROM employees WHERE manager_id IS NULL
  UNION ALL
  -- Recursive case
  SELECT e.id, e.name, e.manager_id, ot.depth + 1
  FROM employees e
  JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT * FROM org_tree ORDER BY depth, name;

Useful Functions

-- String functions
SELECT
  UPPER(name), LOWER(email),
  LENGTH(name),
  SUBSTRING(email FROM 1 FOR 5),
  TRIM('  hello  '),
  REPLACE(name, 'Ltd', 'Limited'),
  CONCAT(first_name, ' ', last_name),
  SPLIT_PART(email, '@', 2) AS domain
FROM users;

-- Date functions
SELECT
  NOW(), CURRENT_DATE, CURRENT_TIME,
  AGE(NOW(), created_at) AS account_age,
  DATE_TRUNC('month', created_at) AS month,
  TO_CHAR(created_at, 'YYYY-MM-DD') AS formatted
FROM users;

-- Conditional
SELECT
  name,
  CASE
    WHEN salary > 100000 THEN 'senior'
    WHEN salary > 60000 THEN 'mid'
    ELSE 'junior'
  END AS level,
  NULLIF(phone, '') AS phone,  -- returns NULL if empty string
  GREATEST(a, b, c),
  LEAST(a, b, c)
FROM employees;

PostgreSQL

Indexes & Performance

Indexes & Performance Index Types B-tree — default, great for =, <, >, BETWEEN, LIKE prefix Hash — only equality (=), faster than B-tree for exact lookups GIN —…

Indexes & Performance

Index Types

B-tree — default, great for =, <, >, BETWEEN, LIKE prefix
Hash — only equality (=), faster than B-tree for exact lookups
GIN — arrays, JSONB, full-text search, @> operator
GiST — geometric data, range types, full-text search
BRIN — very large tables with natural order (timestamps), tiny size

Creating Indexes

-- Basic index
CREATE INDEX idx_users_email ON users(email);

-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Composite index — order matters (leftmost prefix rule)
CREATE INDEX idx_orders_user_status ON orders(user_id, status);

-- Partial index — only index rows matching condition
CREATE INDEX idx_orders_pending ON orders(created_at)
WHERE status = 'pending';

-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

-- GIN index for JSONB
CREATE INDEX idx_products_attrs ON products USING GIN(attributes);

-- GIN for full-text search
CREATE INDEX idx_articles_fts ON articles
USING GIN(to_tsvector('english', title || ' ' || body));

-- Concurrent creation (no table lock!)
CREATE INDEX CONCURRENTLY idx_users_name ON users(name);

-- Drop index
DROP INDEX CONCURRENTLY idx_users_name;

EXPLAIN ANALYZE

-- Basic explain
EXPLAIN SELECT * FROM users WHERE email = 'user@example.com';

-- With actual execution stats
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT u.name, COUNT(o.id)
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;

-- Key nodes to understand:
-- Seq Scan     — full table scan (bad for large tables)
-- Index Scan   — uses index, fetches heap rows
-- Index Only Scan — all data in index (best)
-- Nested Loop  — for small datasets / indexed lookups
-- Hash Join    — for larger datasets
-- Merge Join   — sorted inputs

-- Cost format: (startup cost..total cost) rows=N width=N
-- Actual time: actual time=start..end rows=N loops=N

Query Optimization Tips

Avoid SELECT * — fetch only needed columns
Use LIMIT with ORDER BY for pagination instead of OFFSET for large pages
Index foreign key columns to speed up JOINs
Use partial indexes for filtered queries on large tables
Run ANALYZE to update planner statistics after bulk loads
Run VACUUM to reclaim space and prevent table bloat

-- Keyset pagination (fast for large offsets)
SELECT * FROM orders
WHERE id > :last_seen_id
ORDER BY id
LIMIT 20;

-- Check index usage
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

-- Find slow queries (requires pg_stat_statements)
SELECT query, calls, mean_exec_time, total_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 20;

-- Table size
SELECT
  relname AS table,
  pg_size_pretty(pg_total_relation_size(relid)) AS total_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;

PostgreSQL

Advanced Features

Advanced Features Window Functions -- ROW_NUMBER, RANK, DENSE_RANK SELECT name, department, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary D…

Advanced Features

Window Functions

-- ROW_NUMBER, RANK, DENSE_RANK
SELECT
  name,
  department,
  salary,
  ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
  RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
  DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank
FROM employees;

-- Running totals and moving averages
SELECT
  date,
  amount,
  SUM(amount) OVER (ORDER BY date) AS running_total,
  AVG(amount) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7d
FROM daily_sales;

-- LAG and LEAD — access previous/next row
SELECT
  date,
  revenue,
  LAG(revenue, 1) OVER (ORDER BY date) AS prev_revenue,
  revenue - LAG(revenue, 1) OVER (ORDER BY date) AS change
FROM monthly_revenue;

-- NTILE — divide into buckets
SELECT name, salary, NTILE(4) OVER (ORDER BY salary) AS quartile
FROM employees;

-- FIRST_VALUE, LAST_VALUE
SELECT
  name, department, salary,
  FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) AS top_earner
FROM employees;

JSONB

-- Creating JSONB columns
CREATE TABLE products (
  id SERIAL PRIMARY KEY,
  name TEXT,
  attributes JSONB
);

INSERT INTO products (name, attributes) VALUES
('Laptop', '{"brand": "Dell", "specs": {"ram": 16, "ssd": 512}, "tags": ["laptop", "work"]}');

-- Accessing JSON fields
SELECT attributes->>'brand' AS brand FROM products;           -- text
SELECT attributes->'specs'->>'ram' AS ram FROM products;      -- nested text
SELECT attributes->'specs'->'ram' AS ram_num FROM products;   -- numeric (JSONB)

-- Filtering
SELECT * FROM products WHERE attributes->>'brand' = 'Dell';
SELECT * FROM products WHERE (attributes->'specs'->>'ram')::int > 8;
SELECT * FROM products WHERE attributes @> '{"brand": "Dell"}';  -- containment
SELECT * FROM products WHERE attributes ? 'brand';               -- key exists
SELECT * FROM products WHERE attributes->'tags' ? 'laptop';      -- array contains

-- Updating JSONB
UPDATE products
SET attributes = jsonb_set(attributes, '{specs, ram}', '32')
WHERE id = 1;

-- Remove a key
UPDATE products
SET attributes = attributes - 'brand'
WHERE id = 1;

-- JSONB aggregation
SELECT jsonb_agg(name) AS names, jsonb_object_agg(id, name) AS id_name_map
FROM products;

Full-Text Search

-- Create FTS index
ALTER TABLE articles ADD COLUMN search_vector tsvector;
UPDATE articles SET search_vector = to_tsvector('english', title || ' ' || body);
CREATE INDEX idx_articles_search ON articles USING GIN(search_vector);

-- Search
SELECT title, ts_rank(search_vector, query) AS rank
FROM articles, to_tsquery('english', 'postgresql & performance') query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- Highlight matches
SELECT ts_headline('english', body, to_tsquery('postgresql'), 'MaxWords=35, MinWords=15')
FROM articles WHERE search_vector @@ to_tsquery('postgresql');

-- Phrase search
SELECT title FROM articles
WHERE search_vector @@ phraseto_tsquery('english', 'full text search');

Transactions & Locking

-- Transaction basics
BEGIN;
  UPDATE accounts SET balance = balance - 100 WHERE id = 1;
  UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT;  -- or ROLLBACK;

-- Savepoints
BEGIN;
  INSERT INTO orders (user_id, total) VALUES (1, 200);
  SAVEPOINT after_insert;
  UPDATE inventory SET stock = stock - 1 WHERE product_id = 5;
  -- if something fails:
  ROLLBACK TO after_insert;
COMMIT;

-- Isolation levels
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;   -- default
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

-- Advisory locks (application-level)
SELECT pg_advisory_lock(12345);    -- blocks
SELECT pg_try_advisory_lock(12345); -- returns bool, non-blocking
SELECT pg_advisory_unlock(12345);

-- Row-level locking
SELECT * FROM jobs WHERE status = 'pending' LIMIT 1 FOR UPDATE SKIP LOCKED;

Useful Extras

-- UPSERT (INSERT ... ON CONFLICT)
INSERT INTO users (email, name) VALUES ('a@b.com', 'Alice')
ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name, updated_at = NOW();

-- RETURNING — get inserted/updated data
INSERT INTO users (email) VALUES ('b@c.com') RETURNING id, created_at;
UPDATE orders SET status = 'shipped' WHERE id = 42 RETURNING *;

-- Arrays
SELECT ARRAY[1, 2, 3] AS nums;
SELECT '{apple,banana}'::text[] AS fruits;
SELECT 5 = ANY(ARRAY[1, 3, 5]) AS found;           -- true
SELECT ARRAY_AGG(name ORDER BY name) FROM users;
SELECT UNNEST(tags) AS tag FROM articles;

-- Generate series
SELECT generate_series(1, 10) AS n;
SELECT generate_series('2024-01-01'::date, '2024-12-31', '1 month') AS month;

PostgreSQL

Interview Questions

PostgreSQL Interview Questions Q: What is the difference between CHAR, VARCHAR, and TEXT? CHAR(n) is fixed-length, padded with spaces. VARCHAR(n) is variable-le…

PostgreSQL Interview Questions

Q: What is the difference between CHAR, VARCHAR, and TEXT?

CHAR(n) is fixed-length, padded with spaces. VARCHAR(n) is variable-length with a max. TEXT is unlimited variable-length. In PostgreSQL, all three are stored the same way internally — there is no performance difference. Prefer TEXT unless you have a specific reason to enforce a length constraint.

Q: What are ACID properties?

Atomicity — all operations in a transaction succeed or all are rolled back
Consistency — database moves from one valid state to another; constraints are never violated
Isolation — concurrent transactions don't see each other's uncommitted changes
Durability — committed transactions survive crashes (WAL ensures this)

Q: INNER JOIN vs LEFT JOIN?

INNER JOIN returns only rows where the condition matches in both tables. LEFT JOIN returns all rows from the left table and matching rows from the right — NULLs fill unmatched right-side columns. Use LEFT JOIN when you need to keep all records from one side regardless of whether a match exists.

Q: What is an index and when should you add one?

An index is a separate data structure (usually a B-tree) that speeds up lookups at the cost of extra storage and slower writes. Add indexes on columns used in WHERE, JOIN ON, and ORDER BY clauses. Avoid over-indexing — each index slows INSERT/UPDATE/DELETE and wastes space. Use EXPLAIN ANALYZE to confirm an index is being used.

Q: What is MVCC?

Multi-Version Concurrency Control. PostgreSQL never overwrites rows in place — instead each UPDATE creates a new row version (tuple) and marks the old one as dead. Readers see a consistent snapshot without locking writers. Dead tuples are reclaimed by VACUUM. This enables high concurrency but requires regular vacuuming to prevent bloat.

Q: What is a covering index?

A covering index includes all columns needed by a query, enabling an Index Only Scan — PostgreSQL never touches the heap (main table). Use the INCLUDE clause:

CREATE INDEX idx_orders_user ON orders(user_id) INCLUDE (total, status);

Q: What is the N+1 query problem?

When you fetch N records then make one extra query per record (e.g., fetch 100 users, then query each user's orders individually = 101 queries). Fix by using a JOIN or eager loading to fetch all data in one query. In ORMs use .include() / .with() or DataLoader batching.

Q: What is the difference between WHERE and HAVING?

WHERE filters rows before aggregation. HAVING filters groups after GROUP BY aggregation. You cannot use aggregate functions in WHERE, but you can in HAVING.

Q: How does PostgreSQL handle NULL?

NULL represents unknown/missing data. NULL != NULL — comparisons with NULL always return NULL (unknown), not true/false. Use IS NULL / IS NOT NULL to check for NULL. Aggregate functions like COUNT(*) vs COUNT(col) differ: COUNT(col) ignores NULLs. Use COALESCE(val, default) to substitute a fallback.

Q: Sequence vs SERIAL vs IDENTITY?

SERIAL is a shorthand that creates a sequence + sets a DEFAULT. IDENTITY (SQL standard, PostgreSQL 10+) is more explicit and portable. Both auto-increment. Use GENERATED ALWAYS AS IDENTITY for new tables. Use uuid_generate_v4() or gen_random_uuid() for UUID primary keys in distributed systems.

Q: What is connection pooling and why is it needed?

PostgreSQL spawns a new process per connection — connections are expensive (~5MB RAM each). Connection pooling (PgBouncer, Supabase Pooler) maintains a pool of persistent backend connections and multiplexes many app connections over them. Essential for serverless/edge environments where hundreds of short-lived connections would overwhelm the DB.

PostgreSQL notes for developers

Queries & Joins

Queries & Joins

Basic SELECT

JOIN Types

Subqueries

CTEs (Common Table Expressions)

Useful Functions

Indexes & Performance

Indexes & Performance

Index Types

Creating Indexes

EXPLAIN ANALYZE

Query Optimization Tips

Advanced Features

Advanced Features

Window Functions

JSONB

Full-Text Search

Transactions & Locking

Useful Extras

Interview Questions

PostgreSQL Interview Questions

Q: What is the difference between CHAR, VARCHAR, and TEXT?

Q: What are ACID properties?

Q: INNER JOIN vs LEFT JOIN?

Q: What is an index and when should you add one?

Q: What is MVCC?

Q: What is a covering index?

Q: What is the N+1 query problem?

Q: What is the difference between WHERE and HAVING?

Q: How does PostgreSQL handle NULL?

Q: Sequence vs SERIAL vs IDENTITY?

Q: What is connection pooling and why is it needed?

Keep your PostgreSQL knowledge sharp.