mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
ability to create an equivalent big schema so that the issues reported by Preston using that larger schema can be reproduced and fixed.
408 lines
14 KiB
Python
408 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
import random
|
|
import sqlite3
|
|
|
|
from faker import Faker
|
|
|
|
conn = sqlite3.connect("testing/testing-bigass.db")
|
|
cursor = conn.cursor()
|
|
|
|
fake = Faker()
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
id INTEGER PRIMARY KEY,
|
|
first_name TEXT,
|
|
last_name TEXT,
|
|
email TEXT,
|
|
phone_number TEXT,
|
|
address TEXT,
|
|
city TEXT,
|
|
state TEXT,
|
|
zipcode TEXT,
|
|
age INTEGER,
|
|
created_at TIMESTAMP,
|
|
updated_at TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
cursor.execute(
|
|
"""
|
|
CREATE TABLE products (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT,
|
|
price REAL
|
|
);
|
|
""",
|
|
[],
|
|
)
|
|
|
|
# specific products we already test for
|
|
cursor.execute("INSERT INTO products VALUES(1,'hat',79.0);")
|
|
cursor.execute("INSERT INTO products VALUES(2,'cap',82.0);")
|
|
cursor.execute("INSERT INTO products VALUES(3,'shirt',18.0);")
|
|
cursor.execute("INSERT INTO products VALUES(4,'sweater',25.0);")
|
|
cursor.execute("INSERT INTO products VALUES(5,'sweatshirt',74.0);")
|
|
cursor.execute("INSERT INTO products VALUES(6,'shorts',70.0);")
|
|
cursor.execute("INSERT INTO products VALUES(7,'jeans',78.0);")
|
|
cursor.execute("INSERT INTO products VALUES(8,'sneakers',82.0);")
|
|
cursor.execute("INSERT INTO products VALUES(9,'boots',1.0);")
|
|
cursor.execute("INSERT INTO products VALUES(10,'coat',33.0);")
|
|
cursor.execute("INSERT INTO products VALUES(11,'accessories',81.0);")
|
|
|
|
for i in range(12, 12001):
|
|
name = fake.word().title()
|
|
price = round(random.uniform(5.0, 999.99), 2)
|
|
cursor.execute("INSERT INTO products (id, name, price) VALUES (?, ?, ?)", [i, name, price])
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS orders (
|
|
id INTEGER PRIMARY KEY,
|
|
user_id INTEGER,
|
|
order_date TIMESTAMP,
|
|
total_amount REAL,
|
|
status TEXT,
|
|
shipping_address TEXT,
|
|
shipping_city TEXT,
|
|
shipping_state TEXT,
|
|
shipping_zip TEXT,
|
|
payment_method TEXT,
|
|
tracking_number TEXT,
|
|
notes TEXT,
|
|
FOREIGN KEY (user_id) REFERENCES users(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS order_items (
|
|
id INTEGER PRIMARY KEY,
|
|
order_id INTEGER,
|
|
product_id INTEGER,
|
|
quantity INTEGER,
|
|
unit_price REAL,
|
|
discount REAL,
|
|
tax REAL,
|
|
total_price REAL,
|
|
FOREIGN KEY (order_id) REFERENCES orders(id),
|
|
FOREIGN KEY (product_id) REFERENCES products(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS reviews (
|
|
id INTEGER PRIMARY KEY,
|
|
product_id INTEGER,
|
|
user_id INTEGER,
|
|
rating INTEGER,
|
|
title TEXT,
|
|
comment TEXT,
|
|
helpful_count INTEGER,
|
|
verified_purchase BOOLEAN,
|
|
review_date TIMESTAMP,
|
|
FOREIGN KEY (product_id) REFERENCES products(id),
|
|
FOREIGN KEY (user_id) REFERENCES users(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS inventory_transactions (
|
|
id INTEGER PRIMARY KEY,
|
|
product_id INTEGER,
|
|
transaction_type TEXT,
|
|
quantity INTEGER,
|
|
previous_quantity INTEGER,
|
|
new_quantity INTEGER,
|
|
transaction_date TIMESTAMP,
|
|
reference_type TEXT,
|
|
reference_id INTEGER,
|
|
notes TEXT,
|
|
performed_by TEXT,
|
|
FOREIGN KEY (product_id) REFERENCES products(id)
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS customer_support_tickets (
|
|
id INTEGER PRIMARY KEY,
|
|
user_id INTEGER,
|
|
order_id INTEGER,
|
|
ticket_number TEXT,
|
|
category TEXT,
|
|
priority TEXT,
|
|
status TEXT,
|
|
subject TEXT,
|
|
description TEXT,
|
|
created_at TIMESTAMP,
|
|
updated_at TIMESTAMP,
|
|
resolved_at TIMESTAMP,
|
|
assigned_to TEXT,
|
|
resolution_notes TEXT,
|
|
FOREIGN KEY (user_id) REFERENCES users(id),
|
|
FOREIGN KEY (order_id) REFERENCES orders(id)
|
|
)
|
|
""")
|
|
|
|
print("Generating users...")
|
|
users_data = []
|
|
for i in range(15000):
|
|
if i % 1000 == 0:
|
|
print(f" Generated {i} users...")
|
|
|
|
first_name = fake.first_name()
|
|
last_name = fake.last_name()
|
|
email = fake.email()
|
|
phone_number = fake.phone_number()
|
|
address = fake.street_address()
|
|
city = fake.city()
|
|
state = fake.state_abbr()
|
|
zipcode = fake.zipcode()
|
|
age = fake.random_int(min=18, max=85)
|
|
created_at = fake.date_time_between(start_date="-3y", end_date="now")
|
|
updated_at = fake.date_time_between(start_date=created_at, end_date="now")
|
|
|
|
users_data.append(
|
|
(first_name, last_name, email, phone_number, address, city, state, zipcode, age, created_at, updated_at)
|
|
)
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO users (first_name, last_name, email, phone_number, address,
|
|
city, state, zipcode, age, created_at, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
users_data,
|
|
)
|
|
|
|
print("Generating orders...")
|
|
order_statuses = ["pending", "processing", "shipped", "delivered", "cancelled", "refunded"]
|
|
payment_methods = ["credit_card", "debit_card", "paypal", "apple_pay", "google_pay", "bank_transfer"]
|
|
|
|
orders_data = []
|
|
for i in range(20000):
|
|
if i % 2000 == 0:
|
|
print(f" Generated {i} orders...")
|
|
|
|
user_id = random.randint(1, 15000)
|
|
order_date = fake.date_time_between(start_date="-1y", end_date="now")
|
|
total_amount = round(random.uniform(10.0, 5000.0), 2)
|
|
status = random.choice(order_statuses)
|
|
shipping_address = fake.street_address()
|
|
shipping_city = fake.city()
|
|
shipping_state = fake.state_abbr()
|
|
shipping_zip = fake.zipcode()
|
|
payment_method = random.choice(payment_methods)
|
|
tracking_number = fake.ean13() if status in ["shipped", "delivered"] else None
|
|
notes = fake.text(max_nb_chars=100) if random.random() < 0.3 else None
|
|
|
|
orders_data.append(
|
|
(
|
|
user_id,
|
|
order_date,
|
|
total_amount,
|
|
status,
|
|
shipping_address,
|
|
shipping_city,
|
|
shipping_state,
|
|
shipping_zip,
|
|
payment_method,
|
|
tracking_number,
|
|
notes,
|
|
)
|
|
)
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO orders (user_id, order_date, total_amount, status, shipping_address,
|
|
shipping_city, shipping_state, shipping_zip, payment_method,
|
|
tracking_number, notes)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
orders_data,
|
|
)
|
|
|
|
print("Generating order items...")
|
|
order_items_data = []
|
|
for order_id in range(1, 20001):
|
|
if order_id % 2000 == 0:
|
|
print(f" Generated items for {order_id} orders...")
|
|
|
|
num_items = random.randint(1, 8)
|
|
for _ in range(num_items):
|
|
product_id = random.randint(1, 12000)
|
|
quantity = random.randint(1, 5)
|
|
unit_price = round(random.uniform(0.99, 999.99), 2)
|
|
discount = round(random.uniform(0, 0.3) * unit_price, 2) if random.random() < 0.2 else 0
|
|
tax = round((unit_price - discount) * quantity * 0.08, 2)
|
|
total_price = round((unit_price - discount) * quantity + tax, 2)
|
|
|
|
order_items_data.append((order_id, product_id, quantity, unit_price, discount, tax, total_price))
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO order_items (order_id, product_id, quantity, unit_price,
|
|
discount, tax, total_price)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
order_items_data,
|
|
)
|
|
|
|
print("Generating reviews...")
|
|
reviews_data = []
|
|
for i in range(25000):
|
|
if i % 2500 == 0:
|
|
print(f" Generated {i} reviews...")
|
|
|
|
product_id = random.randint(1, 12000)
|
|
user_id = random.randint(1, 15000)
|
|
rating = random.choices([1, 2, 3, 4, 5], weights=[5, 10, 15, 30, 40])[0]
|
|
title = fake.catch_phrase()
|
|
comment = fake.text(max_nb_chars=500)
|
|
helpful_count = random.randint(0, 100)
|
|
verified_purchase = random.choice([0, 1])
|
|
review_date = fake.date_time_between(start_date="-1y", end_date="now")
|
|
|
|
reviews_data.append((product_id, user_id, rating, title, comment, helpful_count, verified_purchase, review_date))
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO reviews (product_id, user_id, rating, title, comment,
|
|
helpful_count, verified_purchase, review_date)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
reviews_data,
|
|
)
|
|
|
|
print("Generating inventory transactions...")
|
|
transaction_types = ["purchase", "sale", "return", "adjustment", "transfer", "damage"]
|
|
reference_types = ["order", "return", "adjustment", "transfer", "manual"]
|
|
|
|
inventory_data = []
|
|
for i in range(18000):
|
|
if i % 2000 == 0:
|
|
print(f" Generated {i} inventory transactions...")
|
|
|
|
product_id = random.randint(1, 12000)
|
|
transaction_type = random.choice(transaction_types)
|
|
quantity = random.randint(1, 100)
|
|
previous_quantity = random.randint(0, 1000)
|
|
new_quantity = (
|
|
previous_quantity + quantity if transaction_type in ["purchase", "return"] else previous_quantity - quantity
|
|
)
|
|
new_quantity = max(0, new_quantity)
|
|
transaction_date = fake.date_time_between(start_date="-6m", end_date="now")
|
|
reference_type = random.choice(reference_types)
|
|
reference_id = random.randint(1, 20000) if reference_type == "order" else random.randint(1, 1000)
|
|
notes = fake.text(max_nb_chars=100) if random.random() < 0.3 else None
|
|
performed_by = fake.name()
|
|
|
|
inventory_data.append(
|
|
(
|
|
product_id,
|
|
transaction_type,
|
|
quantity,
|
|
previous_quantity,
|
|
new_quantity,
|
|
transaction_date,
|
|
reference_type,
|
|
reference_id,
|
|
notes,
|
|
performed_by,
|
|
)
|
|
)
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO inventory_transactions (product_id, transaction_type, quantity, previous_quantity,
|
|
new_quantity, transaction_date, reference_type, reference_id,
|
|
notes, performed_by)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
inventory_data,
|
|
)
|
|
|
|
print("Generating customer support tickets...")
|
|
ticket_categories = ["shipping", "product", "payment", "account", "return", "technical", "other"]
|
|
priorities = ["low", "medium", "high", "urgent"]
|
|
ticket_statuses = ["open", "in_progress", "waiting_customer", "resolved", "closed"]
|
|
|
|
tickets_data = []
|
|
for i in range(10000):
|
|
if i % 1000 == 0:
|
|
print(f" Generated {i} support tickets...")
|
|
|
|
user_id = random.randint(1, 15000)
|
|
order_id = random.randint(1, 20000) if random.random() < 0.7 else None
|
|
ticket_number = f"TICKET-{fake.random_int(min=100000, max=999999)}"
|
|
category = random.choice(ticket_categories)
|
|
priority = random.choice(priorities)
|
|
status = random.choice(ticket_statuses)
|
|
subject = fake.catch_phrase()
|
|
description = fake.text(max_nb_chars=1000)
|
|
created_at = fake.date_time_between(start_date="-6m", end_date="now")
|
|
updated_at = fake.date_time_between(start_date=created_at, end_date="now")
|
|
resolved_at = (
|
|
fake.date_time_between(start_date=updated_at, end_date="now") if status in ["resolved", "closed"] else None
|
|
)
|
|
assigned_to = fake.name() if status != "open" else None
|
|
resolution_notes = fake.text(max_nb_chars=500) if status in ["resolved", "closed"] else None
|
|
|
|
tickets_data.append(
|
|
(
|
|
user_id,
|
|
order_id,
|
|
ticket_number,
|
|
category,
|
|
priority,
|
|
status,
|
|
subject,
|
|
description,
|
|
created_at,
|
|
updated_at,
|
|
resolved_at,
|
|
assigned_to,
|
|
resolution_notes,
|
|
)
|
|
)
|
|
|
|
cursor.executemany(
|
|
"""
|
|
INSERT INTO customer_support_tickets (user_id, order_id, ticket_number, category, priority,
|
|
status, subject, description, created_at, updated_at,
|
|
resolved_at, assigned_to, resolution_notes)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
tickets_data,
|
|
)
|
|
|
|
print("Creating indexes...")
|
|
cursor.execute("CREATE INDEX age_idx on users (age)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_orders_user_id ON orders(user_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_order_items_order_id ON order_items(order_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_order_items_product_id ON order_items(product_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_reviews_product_id ON reviews(product_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_reviews_user_id ON reviews(user_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_inventory_product_id ON inventory_transactions(product_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tickets_user_id ON customer_support_tickets(user_id)")
|
|
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tickets_status ON customer_support_tickets(status)")
|
|
|
|
conn.commit()
|
|
|
|
# Print summary statistics
|
|
print("\n=== Database Generation Complete ===")
|
|
print(f"Users: {cursor.execute('SELECT COUNT(*) FROM users').fetchone()[0]:,}")
|
|
print(f"Products: {cursor.execute('SELECT COUNT(*) FROM products').fetchone()[0]:,}")
|
|
print(f"Orders: {cursor.execute('SELECT COUNT(*) FROM orders').fetchone()[0]:,}")
|
|
print(f"Order Items: {cursor.execute('SELECT COUNT(*) FROM order_items').fetchone()[0]:,}")
|
|
print(f"Reviews: {cursor.execute('SELECT COUNT(*) FROM reviews').fetchone()[0]:,}")
|
|
print(f"Inventory Transactions: {cursor.execute('SELECT COUNT(*) FROM inventory_transactions').fetchone()[0]:,}")
|
|
print(f"Support Tickets: {cursor.execute('SELECT COUNT(*) FROM customer_support_tickets').fetchone()[0]:,}")
|
|
|
|
# Calculate approximate database size
|
|
cursor.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
|
|
size_bytes = cursor.fetchone()[0]
|
|
print(f"\nApproximate database size: {size_bytes / (1024 * 1024):.2f} MB")
|
|
|
|
conn.close()
|
|
print("\nDatabase created successfully!")
|