mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
add gen-bigass-database.py
ability to create an equivalent big schema so that the issues reported by Preston using that larger schema can be reproduced and fixed.
This commit is contained in:
407
testing/gen-bigass-database.py
Normal file
407
testing/gen-bigass-database.py
Normal file
@@ -0,0 +1,407 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import random
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from faker import Faker
|
||||||
|
|
||||||
|
conn = sqlite3.connect("testing/testing-bigass.db")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
fake = Faker()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS users (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
first_name TEXT,
|
||||||
|
last_name TEXT,
|
||||||
|
email TEXT,
|
||||||
|
phone_number TEXT,
|
||||||
|
address TEXT,
|
||||||
|
city TEXT,
|
||||||
|
state TEXT,
|
||||||
|
zipcode TEXT,
|
||||||
|
age INTEGER,
|
||||||
|
created_at TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE products (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
price REAL
|
||||||
|
);
|
||||||
|
""",
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
|
# specific products we already test for
|
||||||
|
cursor.execute("INSERT INTO products VALUES(1,'hat',79.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(2,'cap',82.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(3,'shirt',18.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(4,'sweater',25.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(5,'sweatshirt',74.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(6,'shorts',70.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(7,'jeans',78.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(8,'sneakers',82.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(9,'boots',1.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(10,'coat',33.0);")
|
||||||
|
cursor.execute("INSERT INTO products VALUES(11,'accessories',81.0);")
|
||||||
|
|
||||||
|
for i in range(12, 12001):
|
||||||
|
name = fake.word().title()
|
||||||
|
price = round(random.uniform(5.0, 999.99), 2)
|
||||||
|
cursor.execute("INSERT INTO products (id, name, price) VALUES (?, ?, ?)", [i, name, price])
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS orders (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
user_id INTEGER,
|
||||||
|
order_date TIMESTAMP,
|
||||||
|
total_amount REAL,
|
||||||
|
status TEXT,
|
||||||
|
shipping_address TEXT,
|
||||||
|
shipping_city TEXT,
|
||||||
|
shipping_state TEXT,
|
||||||
|
shipping_zip TEXT,
|
||||||
|
payment_method TEXT,
|
||||||
|
tracking_number TEXT,
|
||||||
|
notes TEXT,
|
||||||
|
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS order_items (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
order_id INTEGER,
|
||||||
|
product_id INTEGER,
|
||||||
|
quantity INTEGER,
|
||||||
|
unit_price REAL,
|
||||||
|
discount REAL,
|
||||||
|
tax REAL,
|
||||||
|
total_price REAL,
|
||||||
|
FOREIGN KEY (order_id) REFERENCES orders(id),
|
||||||
|
FOREIGN KEY (product_id) REFERENCES products(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS reviews (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
product_id INTEGER,
|
||||||
|
user_id INTEGER,
|
||||||
|
rating INTEGER,
|
||||||
|
title TEXT,
|
||||||
|
comment TEXT,
|
||||||
|
helpful_count INTEGER,
|
||||||
|
verified_purchase BOOLEAN,
|
||||||
|
review_date TIMESTAMP,
|
||||||
|
FOREIGN KEY (product_id) REFERENCES products(id),
|
||||||
|
FOREIGN KEY (user_id) REFERENCES users(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS inventory_transactions (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
product_id INTEGER,
|
||||||
|
transaction_type TEXT,
|
||||||
|
quantity INTEGER,
|
||||||
|
previous_quantity INTEGER,
|
||||||
|
new_quantity INTEGER,
|
||||||
|
transaction_date TIMESTAMP,
|
||||||
|
reference_type TEXT,
|
||||||
|
reference_id INTEGER,
|
||||||
|
notes TEXT,
|
||||||
|
performed_by TEXT,
|
||||||
|
FOREIGN KEY (product_id) REFERENCES products(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS customer_support_tickets (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
user_id INTEGER,
|
||||||
|
order_id INTEGER,
|
||||||
|
ticket_number TEXT,
|
||||||
|
category TEXT,
|
||||||
|
priority TEXT,
|
||||||
|
status TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
description TEXT,
|
||||||
|
created_at TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP,
|
||||||
|
resolved_at TIMESTAMP,
|
||||||
|
assigned_to TEXT,
|
||||||
|
resolution_notes TEXT,
|
||||||
|
FOREIGN KEY (user_id) REFERENCES users(id),
|
||||||
|
FOREIGN KEY (order_id) REFERENCES orders(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("Generating users...")
|
||||||
|
users_data = []
|
||||||
|
for i in range(15000):
|
||||||
|
if i % 1000 == 0:
|
||||||
|
print(f" Generated {i} users...")
|
||||||
|
|
||||||
|
first_name = fake.first_name()
|
||||||
|
last_name = fake.last_name()
|
||||||
|
email = fake.email()
|
||||||
|
phone_number = fake.phone_number()
|
||||||
|
address = fake.street_address()
|
||||||
|
city = fake.city()
|
||||||
|
state = fake.state_abbr()
|
||||||
|
zipcode = fake.zipcode()
|
||||||
|
age = fake.random_int(min=18, max=85)
|
||||||
|
created_at = fake.date_time_between(start_date="-3y", end_date="now")
|
||||||
|
updated_at = fake.date_time_between(start_date=created_at, end_date="now")
|
||||||
|
|
||||||
|
users_data.append(
|
||||||
|
(first_name, last_name, email, phone_number, address, city, state, zipcode, age, created_at, updated_at)
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO users (first_name, last_name, email, phone_number, address,
|
||||||
|
city, state, zipcode, age, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
users_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating orders...")
|
||||||
|
order_statuses = ["pending", "processing", "shipped", "delivered", "cancelled", "refunded"]
|
||||||
|
payment_methods = ["credit_card", "debit_card", "paypal", "apple_pay", "google_pay", "bank_transfer"]
|
||||||
|
|
||||||
|
orders_data = []
|
||||||
|
for i in range(20000):
|
||||||
|
if i % 2000 == 0:
|
||||||
|
print(f" Generated {i} orders...")
|
||||||
|
|
||||||
|
user_id = random.randint(1, 15000)
|
||||||
|
order_date = fake.date_time_between(start_date="-1y", end_date="now")
|
||||||
|
total_amount = round(random.uniform(10.0, 5000.0), 2)
|
||||||
|
status = random.choice(order_statuses)
|
||||||
|
shipping_address = fake.street_address()
|
||||||
|
shipping_city = fake.city()
|
||||||
|
shipping_state = fake.state_abbr()
|
||||||
|
shipping_zip = fake.zipcode()
|
||||||
|
payment_method = random.choice(payment_methods)
|
||||||
|
tracking_number = fake.ean13() if status in ["shipped", "delivered"] else None
|
||||||
|
notes = fake.text(max_nb_chars=100) if random.random() < 0.3 else None
|
||||||
|
|
||||||
|
orders_data.append(
|
||||||
|
(
|
||||||
|
user_id,
|
||||||
|
order_date,
|
||||||
|
total_amount,
|
||||||
|
status,
|
||||||
|
shipping_address,
|
||||||
|
shipping_city,
|
||||||
|
shipping_state,
|
||||||
|
shipping_zip,
|
||||||
|
payment_method,
|
||||||
|
tracking_number,
|
||||||
|
notes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO orders (user_id, order_date, total_amount, status, shipping_address,
|
||||||
|
shipping_city, shipping_state, shipping_zip, payment_method,
|
||||||
|
tracking_number, notes)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
orders_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating order items...")
|
||||||
|
order_items_data = []
|
||||||
|
for order_id in range(1, 20001):
|
||||||
|
if order_id % 2000 == 0:
|
||||||
|
print(f" Generated items for {order_id} orders...")
|
||||||
|
|
||||||
|
num_items = random.randint(1, 8)
|
||||||
|
for _ in range(num_items):
|
||||||
|
product_id = random.randint(1, 12000)
|
||||||
|
quantity = random.randint(1, 5)
|
||||||
|
unit_price = round(random.uniform(0.99, 999.99), 2)
|
||||||
|
discount = round(random.uniform(0, 0.3) * unit_price, 2) if random.random() < 0.2 else 0
|
||||||
|
tax = round((unit_price - discount) * quantity * 0.08, 2)
|
||||||
|
total_price = round((unit_price - discount) * quantity + tax, 2)
|
||||||
|
|
||||||
|
order_items_data.append((order_id, product_id, quantity, unit_price, discount, tax, total_price))
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO order_items (order_id, product_id, quantity, unit_price,
|
||||||
|
discount, tax, total_price)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
order_items_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating reviews...")
|
||||||
|
reviews_data = []
|
||||||
|
for i in range(25000):
|
||||||
|
if i % 2500 == 0:
|
||||||
|
print(f" Generated {i} reviews...")
|
||||||
|
|
||||||
|
product_id = random.randint(1, 12000)
|
||||||
|
user_id = random.randint(1, 15000)
|
||||||
|
rating = random.choices([1, 2, 3, 4, 5], weights=[5, 10, 15, 30, 40])[0]
|
||||||
|
title = fake.catch_phrase()
|
||||||
|
comment = fake.text(max_nb_chars=500)
|
||||||
|
helpful_count = random.randint(0, 100)
|
||||||
|
verified_purchase = random.choice([0, 1])
|
||||||
|
review_date = fake.date_time_between(start_date="-1y", end_date="now")
|
||||||
|
|
||||||
|
reviews_data.append((product_id, user_id, rating, title, comment, helpful_count, verified_purchase, review_date))
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO reviews (product_id, user_id, rating, title, comment,
|
||||||
|
helpful_count, verified_purchase, review_date)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
reviews_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating inventory transactions...")
|
||||||
|
transaction_types = ["purchase", "sale", "return", "adjustment", "transfer", "damage"]
|
||||||
|
reference_types = ["order", "return", "adjustment", "transfer", "manual"]
|
||||||
|
|
||||||
|
inventory_data = []
|
||||||
|
for i in range(18000):
|
||||||
|
if i % 2000 == 0:
|
||||||
|
print(f" Generated {i} inventory transactions...")
|
||||||
|
|
||||||
|
product_id = random.randint(1, 12000)
|
||||||
|
transaction_type = random.choice(transaction_types)
|
||||||
|
quantity = random.randint(1, 100)
|
||||||
|
previous_quantity = random.randint(0, 1000)
|
||||||
|
new_quantity = (
|
||||||
|
previous_quantity + quantity if transaction_type in ["purchase", "return"] else previous_quantity - quantity
|
||||||
|
)
|
||||||
|
new_quantity = max(0, new_quantity)
|
||||||
|
transaction_date = fake.date_time_between(start_date="-6m", end_date="now")
|
||||||
|
reference_type = random.choice(reference_types)
|
||||||
|
reference_id = random.randint(1, 20000) if reference_type == "order" else random.randint(1, 1000)
|
||||||
|
notes = fake.text(max_nb_chars=100) if random.random() < 0.3 else None
|
||||||
|
performed_by = fake.name()
|
||||||
|
|
||||||
|
inventory_data.append(
|
||||||
|
(
|
||||||
|
product_id,
|
||||||
|
transaction_type,
|
||||||
|
quantity,
|
||||||
|
previous_quantity,
|
||||||
|
new_quantity,
|
||||||
|
transaction_date,
|
||||||
|
reference_type,
|
||||||
|
reference_id,
|
||||||
|
notes,
|
||||||
|
performed_by,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO inventory_transactions (product_id, transaction_type, quantity, previous_quantity,
|
||||||
|
new_quantity, transaction_date, reference_type, reference_id,
|
||||||
|
notes, performed_by)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
inventory_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Generating customer support tickets...")
|
||||||
|
ticket_categories = ["shipping", "product", "payment", "account", "return", "technical", "other"]
|
||||||
|
priorities = ["low", "medium", "high", "urgent"]
|
||||||
|
ticket_statuses = ["open", "in_progress", "waiting_customer", "resolved", "closed"]
|
||||||
|
|
||||||
|
tickets_data = []
|
||||||
|
for i in range(10000):
|
||||||
|
if i % 1000 == 0:
|
||||||
|
print(f" Generated {i} support tickets...")
|
||||||
|
|
||||||
|
user_id = random.randint(1, 15000)
|
||||||
|
order_id = random.randint(1, 20000) if random.random() < 0.7 else None
|
||||||
|
ticket_number = f"TICKET-{fake.random_int(min=100000, max=999999)}"
|
||||||
|
category = random.choice(ticket_categories)
|
||||||
|
priority = random.choice(priorities)
|
||||||
|
status = random.choice(ticket_statuses)
|
||||||
|
subject = fake.catch_phrase()
|
||||||
|
description = fake.text(max_nb_chars=1000)
|
||||||
|
created_at = fake.date_time_between(start_date="-6m", end_date="now")
|
||||||
|
updated_at = fake.date_time_between(start_date=created_at, end_date="now")
|
||||||
|
resolved_at = (
|
||||||
|
fake.date_time_between(start_date=updated_at, end_date="now") if status in ["resolved", "closed"] else None
|
||||||
|
)
|
||||||
|
assigned_to = fake.name() if status != "open" else None
|
||||||
|
resolution_notes = fake.text(max_nb_chars=500) if status in ["resolved", "closed"] else None
|
||||||
|
|
||||||
|
tickets_data.append(
|
||||||
|
(
|
||||||
|
user_id,
|
||||||
|
order_id,
|
||||||
|
ticket_number,
|
||||||
|
category,
|
||||||
|
priority,
|
||||||
|
status,
|
||||||
|
subject,
|
||||||
|
description,
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
resolved_at,
|
||||||
|
assigned_to,
|
||||||
|
resolution_notes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.executemany(
|
||||||
|
"""
|
||||||
|
INSERT INTO customer_support_tickets (user_id, order_id, ticket_number, category, priority,
|
||||||
|
status, subject, description, created_at, updated_at,
|
||||||
|
resolved_at, assigned_to, resolution_notes)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
tickets_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Creating indexes...")
|
||||||
|
cursor.execute("CREATE INDEX age_idx on users (age)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_orders_user_id ON orders(user_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_order_items_order_id ON order_items(order_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_order_items_product_id ON order_items(product_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_reviews_product_id ON reviews(product_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_reviews_user_id ON reviews(user_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_inventory_product_id ON inventory_transactions(product_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tickets_user_id ON customer_support_tickets(user_id)")
|
||||||
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tickets_status ON customer_support_tickets(status)")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Print summary statistics
|
||||||
|
print("\n=== Database Generation Complete ===")
|
||||||
|
print(f"Users: {cursor.execute('SELECT COUNT(*) FROM users').fetchone()[0]:,}")
|
||||||
|
print(f"Products: {cursor.execute('SELECT COUNT(*) FROM products').fetchone()[0]:,}")
|
||||||
|
print(f"Orders: {cursor.execute('SELECT COUNT(*) FROM orders').fetchone()[0]:,}")
|
||||||
|
print(f"Order Items: {cursor.execute('SELECT COUNT(*) FROM order_items').fetchone()[0]:,}")
|
||||||
|
print(f"Reviews: {cursor.execute('SELECT COUNT(*) FROM reviews').fetchone()[0]:,}")
|
||||||
|
print(f"Inventory Transactions: {cursor.execute('SELECT COUNT(*) FROM inventory_transactions').fetchone()[0]:,}")
|
||||||
|
print(f"Support Tickets: {cursor.execute('SELECT COUNT(*) FROM customer_support_tickets').fetchone()[0]:,}")
|
||||||
|
|
||||||
|
# Calculate approximate database size
|
||||||
|
cursor.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
|
||||||
|
size_bytes = cursor.fetchone()[0]
|
||||||
|
print(f"\nApproximate database size: {size_bytes / (1024 * 1024):.2f} MB")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print("\nDatabase created successfully!")
|
||||||
Reference in New Issue
Block a user