In [1]:
%matplotlib inline
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
data_file = "P753-line-magnetic-AWAGS_MAG_2010.dat"
columns = [
    "line", "levelFlag", "lineType", "Fiducial", "altitude",
    "FlightNumber", "gpsAltitude", "mag awagsLevelled", "mag tieLevelled",
    "longitude", "latitude"
]
df = pd.read_csv(data_file, delim_whitespace=True, header=None, names=columns)
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5961492 entries, 0 to 5961491
Data columns (total 11 columns):
 #   Column             Dtype  
---  ------             -----  
 0   line               int64  
 1   levelFlag          int64  
 2   lineType           int64  
 3   Fiducial           int64  
 4   altitude           float64
 5   FlightNumber       int64  
 6   gpsAltitude        float64
 7   mag awagsLevelled  float64
 8   mag tieLevelled    float64
 9   longitude          float64
 10  latitude           float64
dtypes: float64(6), int64(5)
memory usage: 500.3 MB
In [5]:
s = df["line"].value_counts()
In [6]:
s10 = s[:10]
s10
Out[6]:
170090    21041
170110    20901
170160    20731
170070    20581
170021    20561
170050    20471
170140    20461
170040    20181
170120    20011
170130    19801
Name: line, dtype: int64
In [7]:
plt.barh([f"{i}" for i, _ in s10.iteritems()], s10.values)
Out[7]:
<BarContainer object of 10 artists>
In [8]:
lines = [l for l, _ in s.iteritems()]
selected_lines = lines[::10]
selected_lines
Out[8]:
[170090,
 170250,
 170180,
 100860,
 100310,
 100530,
 100430,
 101890,
 100150,
 100400,
 101020,
 100300,
 101981,
 101040,
 102510,
 100120,
 100281,
 100110,
 101440,
 101320,
 101250,
 102730,
 102100,
 102750,
 102070,
 102780,
 102160,
 103200,
 103040,
 103060,
 103000,
 103130,
 101181,
 101051,
 103310,
 100050,
 103370,
 100941,
 102302,
 102831,
 101090,
 101741,
 103571]
In [9]:
sdf = df[df["line"].isin(selected_lines)]
In [10]:
sdf
Out[10]:
line levelFlag lineType Fiducial altitude FlightNumber gpsAltitude mag awagsLevelled mag tieLevelled longitude latitude
45216 103571 2 2 675290 80.43 2 418.04 11.91287 56865.32 149.756172 -31.007809
45217 103571 2 2 675291 80.31 2 417.90 12.01157 56865.42 149.756100 -31.007824
45218 103571 2 2 675292 80.19 2 417.78 12.07512 56865.49 149.756029 -31.007839
45219 103571 2 2 675293 80.05 2 417.66 12.14257 56865.56 149.755958 -31.007854
45220 103571 2 2 675294 79.91 2 417.56 12.19062 56865.61 149.755887 -31.007865
... ... ... ... ... ... ... ... ... ... ... ...
5775313 102302 2 2 585816 77.60 53 419.82 115.25880 56941.15 150.177823 -30.540623
5775314 102302 2 2 585817 77.75 53 420.04 114.99190 56940.88 150.177896 -30.540621
5775315 102302 2 2 585818 77.93 53 420.27 114.71690 56940.60 150.177968 -30.540624
5775316 102302 2 2 585819 78.13 53 420.51 114.45770 56940.34 150.178041 -30.540622
5775317 102302 2 2 585820 78.34 53 420.76 114.19850 56940.08 150.178113 -30.540620

605403 rows × 11 columns

In [11]:
def scatter(df, col="altitude", total_area=True):
    fig, ax = plt.subplots(figsize=(10,10))
    ax.scatter(df["latitude"], df["longitude"], c=df[col])
    if total_area:
        ax.set_xlim(-31.1, -29.7)
        ax.set_ylim(149.0, 150.3)
    plt.show()
In [12]:
scatter(df)
In [13]:
scatter(sdf)
In [14]:
scatter(df, col="FlightNumber")
In [15]:
scatter(sdf, col="FlightNumber")
In [16]:
sdf = df[df["FlightNumber"] == 2]
In [17]:
scatter(sdf)
In [18]:
sdf = df[df["FlightNumber"] == 3]
In [19]:
scatter(sdf)
In [20]:
sdf = df[df["FlightNumber"] == 4]
In [21]:
scatter(sdf)
In [22]:
sdf = df[df["FlightNumber"] == 24]
In [23]:
scatter(sdf)
In [ ]: