Skip to content

Instantly share code, notes, and snippets.

@anhiga
Last active August 1, 2017 14:52
Show Gist options
  • Select an option

  • Save anhiga/34bbb5e8df062a159ca17d90859dfe69 to your computer and use it in GitHub Desktop.

Select an option

Save anhiga/34bbb5e8df062a159ca17d90859dfe69 to your computer and use it in GitHub Desktop.
Asteroid and comet DB
import numpy as np
import os
dtype = np.dtype([
("NO", np.int32),
("NOBS", np.int32),
("OBSFRST", np.int32),
("OBSLAST", np.int32),
("EPOCH", np.float64),
("CALEPO", np.float64),
("MA", np.float64),
("W", np.float64),
("OM", np.float64),
("IN", np.float64),
("EC", np.float64),
("A", np.float64),
("QR", np.float64),
("TP", np.float64),
("TPCAL", np.float64),
("TPFRAC", np.float64),
("SOLDAT", np.float64),
("SRC1", np.float64),
("SRC2", np.float64),
("SRC3", np.float64),
("SRC4", np.float64),
("SRC5", np.float64),
("SRC6", np.float64),
("SRC7", np.float64),
("SRC8", np.float64),
("SRC9", np.float64),
("SRC10", np.float64),
("SRC11", np.float64),
("SRC12", np.float64),
("SRC13", np.float64),
("SRC14", np.float64),
("SRC15", np.float64),
("SRC16", np.float64),
("SRC17", np.float64),
("SRC18", np.float64),
("SRC19", np.float64),
("SRC20", np.float64),
("SRC21", np.float64),
("SRC22", np.float64),
("SRC23", np.float64),
("SRC24", np.float64),
("SRC25", np.float64),
("SRC26", np.float64),
("SRC27", np.float64),
("SRC28", np.float64),
("SRC29", np.float64),
("SRC30", np.float64),
("SRC31", np.float64),
("SRC32", np.float64),
("SRC33", np.float64),
("SRC34", np.float64),
("SRC35", np.float64),
("SRC36", np.float64),
("SRC37", np.float64),
("SRC38", np.float64),
("SRC39", np.float64),
("SRC40", np.float64),
("SRC41", np.float64),
("SRC42", np.float64),
("SRC43", np.float64),
("SRC44", np.float64),
("SRC45", np.float64),
("PRELTV", np.int8),
("SPHMX3", np.int8),
("SPHMX5", np.int8),
("JGSEP", np.int8),
("TWOBOD", np.int8),
("NSATS", np.int8),
("UPARM", np.int8),
("LSRC", np.int8),
("NDEL", np.int16),
("NDOP", np.int16),
("H", np.float32),
("G", np.float32),
("A1", np.float32),
("A2", np.float32),
("A3", np.float32),
("R0", np.float32),
("ALN", np.float32),
("NM", np.float32),
("NN", np.float32),
("NK", np.float32),
("LGK", np.float32),
("RHO", np.float32),
("AMRAT", np.float32),
("ALF", np.float32),
("DEL", np.float32),
("SPHLM3", np.float32),
("SPHLM5", np.float32),
("RP", np.float32),
("GM", np.float32),
("RAD", np.float32),
("EXTNT1", np.float32),
("EXTNT2", np.float32),
("EXTNT3", np.float32),
("MOID", np.float32),
("ALBEDO", np.float32),
("BVCI", np.float32),
("UBCI", np.float32),
("IRCI", np.float32),
("RMSW", np.float32),
("RMSU", np.float32),
("RMSN", np.float32),
("RMSNT", np.float32),
("RMSH", np.float32),
("EQUNOX", "|S4"),
("PENAM", "|S6"),
("SBNAM", "|S12"),
("SPTYPT", "|S5"),
("SPTYPS", "|S5"),
("DARC", "|S9"),
("COMNT1", "|S41"),
("COMNT2", "|S80"),
("DESIG", "|S13"),
("ASTEST", "|S8"),
("IREF", "|S10"),
("ASTNAM", "|S18"),
])
f = open("dast5_le.dat", "rb")
f.seek(835, os.SEEK_SET)
data = np.fromfile(f, dtype=dtype)
import numpy as np
import os
dtype = np.dtype([
("NO", np.int32),
("NOBS", np.int32),
("OBSFRST", np.int32),
("OBSLAST", np.int32),
("EPOCH", np.float64),
("CALEPO", np.float64),
("MA", np.float64),
("W", np.float64),
("OM", np.float64),
("IN", np.float64),
("EC", np.float64),
("A", np.float64),
("QR", np.float64),
("TP", np.float64),
("TPCAL", np.float64),
("TPFRAC", np.float64),
("SOLDAT", np.float64),
("SRC1", np.float64),
("SRC2", np.float64),
("SRC3", np.float64),
("SRC4", np.float64),
("SRC5", np.float64),
("SRC6", np.float64),
("SRC7", np.float64),
("SRC8", np.float64),
("SRC9", np.float64),
("SRC10", np.float64),
("SRC11", np.float64),
("SRC12", np.float64),
("SRC13", np.float64),
("SRC14", np.float64),
("SRC15", np.float64),
("SRC16", np.float64),
("SRC17", np.float64),
("SRC18", np.float64),
("SRC19", np.float64),
("SRC20", np.float64),
("SRC21", np.float64),
("SRC22", np.float64),
("SRC23", np.float64),
("SRC24", np.float64),
("SRC25", np.float64),
("SRC26", np.float64),
("SRC27", np.float64),
("SRC28", np.float64),
("SRC29", np.float64),
("SRC30", np.float64),
("SRC31", np.float64),
("SRC32", np.float64),
("SRC33", np.float64),
("SRC34", np.float64),
("SRC35", np.float64),
("SRC36", np.float64),
("SRC37", np.float64),
("SRC38", np.float64),
("SRC39", np.float64),
("SRC40", np.float64),
("SRC41", np.float64),
("SRC42", np.float64),
("SRC43", np.float64),
("SRC44", np.float64),
("SRC45", np.float64),
("SRC46", np.float64),
("SRC47", np.float64),
("SRC48", np.float64),
("SRC49", np.float64),
("SRC50", np.float64),
("SRC51", np.float64),
("SRC52", np.float64),
("SRC53", np.float64),
("SRC54", np.float64),
("SRC55", np.float64),
("PRELTV", np.int8),
("SPHMX3", np.int8),
("SPHMX5", np.int8),
("JGSEP", np.int8),
("TWOBOD", np.int8),
("NSATS", np.int8),
("UPARM", np.int8),
("LSRC", np.int8),
("IPYR", np.int16),
("NDEL", np.int16),
("NDOP", np.int16),
("NOBSMT", np.int16),
("NOBSMN", np.int16),
("H", np.float32),
("G", np.float32),
("M1 (MT)", np.float32),
("M2 (MN)", np.float32),
("K1 (MTSMT)", np.float32),
("K2 (MNSMT)", np.float32),
("PHCOF (MNP)", np.float32),
("A1", np.float32),
("A2", np.float32),
("A3", np.float32),
("DT", np.float32),
("R0", np.float32),
("ALN", np.float32),
("NM", np.float32),
("NN", np.float32),
("NK", np.float32),
("S0", np.float32),
("TCL", np.float32),
("RHO", np.float32),
("AMRAT", np.float32),
("AJ1", np.float32),
("AJ2", np.float32),
("ET1", np.float32),
("ET2", np.float32),
("DTH", np.float32),
("ALF", np.float32),
("DEL", np.float32),
("SPHLM3", np.float32),
("SPHLM5", np.float32),
("RP", np.float32),
("GM", np.float32),
("RAD", np.float32),
("EXTNT1", np.float32),
("EXTNT2", np.float32),
("EXTNT3", np.float32),
("MOID", np.float32),
("ALBEDO", np.float32),
("RMSW", np.float32),
("RMSU", np.float32),
("RMSN", np.float32),
("RMSNT", np.float32),
("RMSMT", np.float32),
("RMSMN", np.float32),
("EQUNOX", "|S4"),
("PENAM", "|S6"),
("SBNAM", "|S12"),
("DARC", "|S9"),
("COMNT3", "|S49"),
("COMNT2", "|S80"),
("DESIG", "|S13"),
("COMEST", "|S14"),
("IREF", "|S10"),
("COMNAM", "|S29"),
])
f = open("dcom5_le.dat", "rb")
f.seek(976, os.SEEK_SET)
data = np.fromfile(f, dtype=dtype)
import numpy as np
import os
ast_dtype = np.dtype([
("IBIAS1", np.int32),
("BEGINP1", "|S8"),
("BEGINP2", "|S8"),
("BEGINP3", "|S8"),
("ENDPT1", "|S8"),
("ENDPT2", "|S8"),
("ENDPT3", "|S8"),
("CALDATE", "|S19"),
("JDDATE", np.float64),
("FTYP", "|S1"),
("BYTE2A", np.int16),
("IBIAS0", np.int32),
])
f = open("dast5_le.dat", "rb")
ast_header = np.fromfile(f, dtype=ast_dtype, count=1)
com_dtype = np.dtype([
("IBIAS2", np.int32),
("BEGINP1", "|S8"),
("BEGINP2", "|S8"),
("BEGINP3", "|S8"),
("ENDPT1", "|S8"),
("ENDPT2", "|S8"),
("ENDPT3", "|S8"),
("CALDATE", "|S19"),
("JDDATE", np.float64),
("FTYP", "|S1"),
("BYTE2C", np.int16),
])
f = open("dcom5_le.dat", "rb")
com_header = np.fromfile(f, dtype=com_dtype, count=1)
@astrojuanlu
Copy link

I loaded the data into a pandas DataFrame and this is the number of unique elements of each column of the asteroid database:

[('NO', 736584),
 ('W', 736584),
 ('OM', 736584),
 ('IN', 736584),
 ('A', 736584),
 ('QR', 736584),
 ('ASTNAM', 736584),
 ('TP', 736579),
 ('MA', 736578),
 ('TPFRAC', 736578),
 ('EC', 736576),
 ('TPCAL', 736567),
 ('DESIG', 736253),
 ('SRC1', 736227),
 ('SRC2', 736227),
 ('SRC3', 736227),
 ('SRC4', 736227),
 ('SRC5', 736227),
 ('SRC6', 736227),
 ('SRC7', 736227),
 ('SRC8', 736227),
 ('SRC9', 736227),
 ('SRC10', 736227),
 ('SRC11', 736227),
 ('SRC12', 736227),
 ('SRC13', 736227),
 ('SRC14', 736227),
 ('SRC15', 736227),
 ('SRC16', 736227),
 ('SRC17', 736227),
 ('SRC18', 736227),
 ('SRC19', 736227),
 ('SRC20', 736227),
 ('SRC21', 736227),
 ('SOLDAT', 381103),
 ('MOID', 279673),
 ('RMSU', 70480),
 ('RMSN', 66890),
 ('RMSW', 58099),
 ('RMSH', 43883),
 ('RAD', 16609),
 ('RP', 13147),
 ('OBSFRST', 12982),
 ('H', 7958),
 ('EPOCH', 6585),
 ('CALEPO', 6585),
 ('OBSLAST', 5446),
 ('COMNT2', 4437),
 ('NOBS', 2416),
 ('ALBEDO', 1058),
 ('DARC', 917),
 ('IREF', 379),
 ('UBCI', 340),
 ('BVCI', 308),
 ('COMNT1', 250),
 ('SPTYPT', 132),
 ('SRC22', 73),
 ('SRC23', 73),
 ('SRC24', 73),
 ('SRC25', 73),
 ('SRC26', 73),
 ('SRC27', 73),
 ('SRC28', 73),
 ('A2', 73),
 ('G', 48),
 ('SPTYPS', 35),
 ('NDEL', 24),
 ('NDOP', 18),
 ('EXTNT1', 17),
 ('EXTNT2', 17),
 ('EXTNT3', 17),
 ('GM', 13),
 ('UPARM', 11),
 ('SRC29', 7),
 ('SRC30', 7),
 ('SRC31', 7),
 ('SRC32', 7),
 ('SRC33', 7),
 ('SRC34', 7),
 ('SRC35', 7),
 ('SRC36', 7),
 ('A1', 7),
 ('LSRC', 5),
 ('NM', 4),
 ('PENAM', 4),
 ('SBNAM', 4),
 ('ASTEST', 4),
 ('R0', 3),
 ('ALN', 3),
 ('SRC37', 2),
 ('SRC38', 2),
 ('SRC39', 2),
 ('SRC40', 2),
 ('SRC41', 2),
 ('SRC42', 2),
 ('SRC43', 2),
 ('SRC44', 2),
 ('SRC45', 2),
 ('TWOBOD', 2),
 ('A3', 2),
 ('NN', 2),
 ('NK', 2),
 ('AMRAT', 2),
 ('IRCI', 2),
 ('PRELTV', 1),
 ('SPHMX3', 1),
 ('SPHMX5', 1),
 ('JGSEP', 1),
 ('NSATS', 1),
 ('LGK', 1),
 ('RHO', 1),
 ('ALF', 1),
 ('DEL', 1),
 ('SPHLM3', 1),
 ('SPHLM5', 1),
 ('RMSNT', 1),
 ('EQUNOX', 1)]

To make the DataFrame more usable, some cleaning could be done:

  • The columns with one unique value are all pointless (except, perhaps "EQUNOX")
  • Some others contain values that might represent some NULL
  • The character columns contain trailing whitespace

Anyway, the database is not that big:

>>> df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736584 entries, 0 to 736583
Columns: 117 entries, NO to ASTNAM
dtypes: float32(33), float64(58), int16(2), int32(4), int8(8), object(12)
memory usage: 505.8+ MB

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment