# Script: read_CDB_ascii.py # Contact: George McCabe - NCAR (mccabe@ucar.edu) # Date: 2020-02-24 # Description: Read ASCII CalcDeltaB data, extracting one column of data, to be read by the MET tools # Usage: read_CDB_ascii.py # input_filename is the path to the file to read # column_to_read is the 1-based index of the column to read data (must be between 4 and 6) # convert_geographic can optionally be set to 'geog' to convert geomagnetic lat/lon to geographic # leave this argument out to skip conversion import sys import re import numpy as np import datetime as dt if len(sys.argv) < 3: print("ERROR: Incorrect arguments given. Must provide input filename and column of data to read") exit(1) input_filename = sys.argv[1] column_to_read = int(sys.argv[2]) # if last argument is 'geog' convert geomagnetic lat/lon to geographic lat/lon convert_to_geographic = False if len(sys.argv) == 4: if sys.argv[3] == "geog": convert_to_geographic = True else: print(f"Invalid value for convert_geographic argument: {sys.argv[3]}") print("Set argument to 'geog' to convert geomagnetic lat/lon to geogrpahic, " "or leave it out to skip conversion") exit(1) if convert_to_geographic: # import geopack module if converting lat/lon values from geopack import geopack as gp print("Converting geomagnetic lat/lon values to geographic lat/lon") if column_to_read < 4 or column_to_read > 6: print("ERROR: Column to read must be between 4 and 6 (columns start with 1)") exit(1) # adjust column number to align with zero based array column_to_read -= 1 # dictionary to hold numpy data array (met_data) and time/grid attributes (attrs) met_input = {} valid_time = None nx = None ny = None ntotal = None missing_value = None data_type = None header_name = None data_points = [] lat_points = [] lon_points = [] #################################################################################################### # read each line of input file with open(input_filename, 'r') as f: for idx, line in enumerate(f): # extract time info match = re.match(r'^# Date, time:(.*)', line) if match: time_items = match.group(1).split() time_formatted = f"{time_items[0]}{time_items[1].zfill(2)}{time_items[2].zfill(2)}_" +\ f"{time_items[3]}" valid_time = dt.datetime.strptime(time_formatted, "%Y%m%d_%H:%M:%S") print(f"Valid time: {valid_time}") # setup geopack converter by updating dipole based on valid time if convert_to_geographic: # use epoch time to find unix time (seconds) to pass to gp.recalc t0 = dt.datetime.utcfromtimestamp(0) ut = (valid_time-t0).total_seconds() ps = gp.recalc(ut) # extract nx/ny/ntotal match = re.match(r'^# Output data: field with (\d+)x(\d+)=(\d+) elements', line) if match: nx = int(match.group(1)) ny = int(match.group(2)) ntotal = int(match.group(3)) print(f"nx: {nx}, ny: {ny}, ntotal: {ntotal}") # extract missing value match = re.match(r'^# Missing data:(.*)$', line) if match: missing_value = match.group(1).strip() print(f"Missing value is {missing_value}") # extract data type match = re.match(r'^# Data type:(.*)$', line) if match: data_type = match.group(1).strip() print(f"Data type is {data_type}") # get header match = re.match(r'^#( mlon.*)', line) if match: headers = match.group(1).split() header_name = headers[column_to_read] # get data points if not line.startswith('#'): if missing_value is None: print("ERROR: Could not extract missing value from header") exit(1) # get magnetic lat/lon line_items = line.split() lon = float(line_items[0]) lat = float(line_items[1]) # if requested, convert geomagnetic lon (xmag) and lat (ymag) to geographic lat/lon if convert_to_geographic: # in the output, xgeo = lat, ygeo = lon? lat, lon, _ = gp.geomag(lon, lat, 0, -1) if lon < -180: lon += 360 if lat > 90: lat -= 180 lat_points.append(lat) lon_points.append(lon) point = line_items[column_to_read] if point == missing_value: point = -9999 else: point = float(point) data_points.append(point) # print(f"{idx}: lat: {latlon_point['lat']}, lon: {latlon_point['lon']}, data: {point}") # ensure correct number of data points were found if ntotal != len(data_points): print(f"ERROR: Total number of points found {len(data_points)} does not match header " f"total {ntotal}") exit(1) if valid_time is None: print("ERROR: Could not extract time information from header") exit(1) if nx is None or ny is None or ntotal is None: print("ERROR: Could not extract nx/ny/ntotal information from header") exit(1) print(f"Reading data from {header_name}") met_data = np.rot90(np.asarray(data_points, dtype='float').reshape(ny, nx).transpose()).copy() print("Data Shape: " + repr(met_data.shape)) print("Data Type: " + repr(met_data.dtype)) min_lat = min(lat_points) min_lon = min(lon_points) max_lat = max(lat_points) max_lon = max(lon_points) delta_lat = (max_lat - min_lat) / ny delta_lon = (max_lon - min_lon) / nx attrs = { 'valid': valid_time.strftime("%Y%m%d_%H%M%S"), 'init': valid_time.strftime("%Y%m%d_%H%M%S"), 'lead': '00', 'accum': '00', 'name': f'{data_type.split()[0]}', 'long_name': f'{data_type} {header_name}', 'level': 'Surface', 'units': 'UNKNOWN', 'grid': { 'name': f'{data_type.split()[1]}', 'type' : 'LatLon', 'lat_ll' : min_lat, 'lon_ll' : min_lon, 'delta_lat' : delta_lat, 'delta_lon' : delta_lon, 'Nlat' : ny, 'Nlon' : nx, } } print(attrs) met_input['met_data'] = met_data met_input['attrs'] = attrs