mirror of
https://github.com/aljazceru/satshkd-vercel.git
synced 2025-12-17 05:04:24 +01:00
- Removed all references to HKD and satshkd from codebase - Updated README, package.json, documentation to focus on EUR/sats - Removed HKD-specific files (convert_btcticker.js, archive/hkd_historical) - Updated all titles from SatsHKD/HKDSAT to SatsEUR/EURSAT - Removed HKD image reference from sats.hbs - Updated Python scripts to use 'historical' instead of 'hkd_historical' - Added all 16 missing EU official languages with translations: * Bulgarian (bg), Croatian (hr), Czech (cs), Danish (da) * Estonian (et), Finnish (fi), Greek (el), Hungarian (hu) * Irish (ga), Latvian (lv), Lithuanian (lt), Maltese (mt) * Romanian (ro), Slovak (sk), Slovenian (sl), Swedish (sv) - Now supports all 24 EU official languages
36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
# Import the pandas and json libraries
|
|
import pandas as pd
|
|
import json
|
|
|
|
# Define a function that reads in data from a JSON file, converts it to a DataFrame, removes duplicates, and saves the deduplicated data to a new file
|
|
def get_data_from_file(datafile):
|
|
# Open the specified JSON file and load its contents into a Python object
|
|
with open(datafile, 'r') as f:
|
|
data = json.load(f)
|
|
# Convert the Python object to a pandas DataFrame
|
|
df = pd.DataFrame(data)
|
|
|
|
# Check for duplicate rows in the DataFrame
|
|
duplicates = df.duplicated(keep="first")
|
|
|
|
# Remove the duplicate rows from the DataFrame
|
|
dedup_df = df[~duplicates]
|
|
|
|
# Print the original and deduplicated DataFrame sizes
|
|
print("Original DataFrame contains {} rows".format(len(df)))
|
|
print("Deduplicated DataFrame contains {} rows".format(len(dedup_df)))
|
|
|
|
# Identify the dates that were duplicated and removed
|
|
duplicated_dates = df[duplicates]['date'].unique()
|
|
print("Dates that were duplicated and removed:")
|
|
print(duplicated_dates)
|
|
|
|
# Save the deduplicated data to a new JSON file
|
|
dedup_file = "historical_dedup"
|
|
dedup_df.to_json(dedup_file, orient="records")
|
|
|
|
# If this script is run directly, call the get_data_from_file function with the specified file name
|
|
if __name__ == "__main__":
|
|
datafile = "historical"
|
|
get_data_from_file(datafile)
|