import pandas as pd


s = pd.Series([2,-1,3,5])
s

0    2
1   -1
2    3
3    5
dtype: int64


import numpy as np
np.exp(s)

0      7.389056
1      0.367879
2     20.085537
3    148.413159
dtype: float64


s + [1000,2000,3000,4000]

0    1002
1    1999
2    3003
3    4005
dtype: int64


s + 1000

0    1002
1     999
2    1003
3    1005
dtype: int64


s < 0

0    False
1     True
2    False
3    False
dtype: bool


s2 = pd.Series([68, 83, 112, 68], index=["alice", "bob", "charles", "darwin"])
s2

alice       68
bob         83
charles    112
darwin      68
dtype: int64


s2["bob"]

83


s2[1]

83


s2.loc["bob"]

83


s2.iloc[1]

83


s2.iloc[1:3]

bob         83
charles    112
dtype: int64


surprise = pd.Series([1000, 1001, 1002, 1003])
surprise

0    1000
1    1001
2    1002
3    1003
dtype: int64


surprise_slice = surprise[2:]
surprise_slice

2    1002
3    1003
dtype: int64


try:
    surprise_slice[0]
except KeyError as e:
    print("키 에러:", e)

키 에러: 0


surprise_slice.iloc[0]

1002


weights = {"alice": 68, "bob": 83, "colin": 86, "darwin": 68}
s3 = pd.Series(weights)
s3

alice     68
bob       83
colin     86
darwin    68
dtype: int64


s4 = pd.Series(weights, index = ["colin", "alice"])
s4

colin    86
alice    68
dtype: int64


print(s2.keys())
print(s3.keys())

s2 + s3

Index(['alice', 'bob', 'charles', 'darwin'], dtype='object')
Index(['alice', 'bob', 'colin', 'darwin'], dtype='object')

alice      136.0
bob        166.0
charles      NaN
colin        NaN
darwin     136.0
dtype: float64


s5 = pd.Series([1000,1000,1000,1000])
print("s2 =", s2.values)
print("s5 =", s5.values)

s2 + s5

s2 = [ 68  83 112  68]
s5 = [1000 1000 1000 1000]

alice     NaN
bob       NaN
charles   NaN
darwin    NaN
0         NaN
1         NaN
2         NaN
3         NaN
dtype: float64


meaning = pd.Series(42, ["life", "universe", "everything"])
meaning

life          42
universe      42
everything    42
dtype: int64


s6 = pd.Series([83, 68], index=["bob", "alice"], name="weights")
s6

bob      83
alice    68
Name: weights, dtype: int64


%matplotlib inline
import matplotlib.pyplot as plt
temperatures = [4.4,5.1,6.1,6.2,6.1,6.1,5.7,5.2,4.7,4.1,3.9,3.5]
s7 = pd.Series(temperatures, name="Temperature")
s7.plot()
plt.show()


dates = pd.date_range('2016/10/29 5:30pm', periods=12, freq='H')
dates

DatetimeIndex(['2016-10-29 17:30:00', '2016-10-29 18:30:00',
               '2016-10-29 19:30:00', '2016-10-29 20:30:00',
               '2016-10-29 21:30:00', '2016-10-29 22:30:00',
               '2016-10-29 23:30:00', '2016-10-30 00:30:00',
               '2016-10-30 01:30:00', '2016-10-30 02:30:00',
               '2016-10-30 03:30:00', '2016-10-30 04:30:00'],
              dtype='datetime64[ns]', freq='H')


temp_series = pd.Series(temperatures, dates)
temp_series

2016-10-29 17:30:00    4.4
2016-10-29 18:30:00    5.1
2016-10-29 19:30:00    6.1
2016-10-29 20:30:00    6.2
2016-10-29 21:30:00    6.1
2016-10-29 22:30:00    6.1
2016-10-29 23:30:00    5.7
2016-10-30 00:30:00    5.2
2016-10-30 01:30:00    4.7
2016-10-30 02:30:00    4.1
2016-10-30 03:30:00    3.9
2016-10-30 04:30:00    3.5
Freq: H, dtype: float64


temp_series.plot(kind="bar")

plt.grid(True)
plt.show()


temp_series_freq_2H = temp_series.resample("2H")
temp_series_freq_2H

<pandas.core.resample.DatetimeIndexResampler object at 0x7fccdeb8cf28>


temp_series_freq_2H = temp_series_freq_2H.mean()


temp_series_freq_2H.plot(kind="bar")
plt.show()


temp_series_freq_2H = temp_series.resample("2H").min()
temp_series_freq_2H

2016-10-29 16:00:00    4.4
2016-10-29 18:00:00    5.1
2016-10-29 20:00:00    6.1
2016-10-29 22:00:00    5.7
2016-10-30 00:00:00    4.7
2016-10-30 02:00:00    3.9
2016-10-30 04:00:00    3.5
Freq: 2H, dtype: float64


temp_series_freq_2H = temp_series.resample("2H").apply(np.min)
temp_series_freq_2H

2016-10-29 16:00:00    4.4
2016-10-29 18:00:00    5.1
2016-10-29 20:00:00    6.1
2016-10-29 22:00:00    5.7
2016-10-30 00:00:00    4.7
2016-10-30 02:00:00    3.9
2016-10-30 04:00:00    3.5
Freq: 2H, dtype: float64


temp_series_freq_15min = temp_series.resample("15Min").mean()
temp_series_freq_15min.head(n=10) # `head`는 상위 n 개의 값만 출력합니다

2016-10-29 17:30:00    4.4
2016-10-29 17:45:00    NaN
2016-10-29 18:00:00    NaN
2016-10-29 18:15:00    NaN
2016-10-29 18:30:00    5.1
2016-10-29 18:45:00    NaN
2016-10-29 19:00:00    NaN
2016-10-29 19:15:00    NaN
2016-10-29 19:30:00    6.1
2016-10-29 19:45:00    NaN
Freq: 15T, dtype: float64


temp_series_freq_15min = temp_series.resample("15Min").interpolate(method="cubic")
temp_series_freq_15min.head(n=10)

2016-10-29 17:30:00    4.400000
2016-10-29 17:45:00    4.452911
2016-10-29 18:00:00    4.605113
2016-10-29 18:15:00    4.829758
2016-10-29 18:30:00    5.100000
2016-10-29 18:45:00    5.388992
2016-10-29 19:00:00    5.669887
2016-10-29 19:15:00    5.915839
2016-10-29 19:30:00    6.100000
2016-10-29 19:45:00    6.203621
Freq: 15T, dtype: float64


temp_series.plot(label="Period: 1 hour")
temp_series_freq_15min.plot(label="Period: 15 minutes")
plt.legend()
plt.show()


temp_series_ny = temp_series.tz_localize("America/New_York")
temp_series_ny

2016-10-29 17:30:00-04:00    4.4
2016-10-29 18:30:00-04:00    5.1
2016-10-29 19:30:00-04:00    6.1
2016-10-29 20:30:00-04:00    6.2
2016-10-29 21:30:00-04:00    6.1
2016-10-29 22:30:00-04:00    6.1
2016-10-29 23:30:00-04:00    5.7
2016-10-30 00:30:00-04:00    5.2
2016-10-30 01:30:00-04:00    4.7
2016-10-30 02:30:00-04:00    4.1
2016-10-30 03:30:00-04:00    3.9
2016-10-30 04:30:00-04:00    3.5
dtype: float64


temp_series_paris = temp_series_ny.tz_convert("Europe/Paris")
temp_series_paris

2016-10-29 23:30:00+02:00    4.4
2016-10-30 00:30:00+02:00    5.1
2016-10-30 01:30:00+02:00    6.1
2016-10-30 02:30:00+02:00    6.2
2016-10-30 02:30:00+01:00    6.1
2016-10-30 03:30:00+01:00    6.1
2016-10-30 04:30:00+01:00    5.7
2016-10-30 05:30:00+01:00    5.2
2016-10-30 06:30:00+01:00    4.7
2016-10-30 07:30:00+01:00    4.1
2016-10-30 08:30:00+01:00    3.9
2016-10-30 09:30:00+01:00    3.5
dtype: float64


temp_series_paris_naive = temp_series_paris.tz_localize(None)
temp_series_paris_naive

2016-10-29 23:30:00    4.4
2016-10-30 00:30:00    5.1
2016-10-30 01:30:00    6.1
2016-10-30 02:30:00    6.2
2016-10-30 02:30:00    6.1
2016-10-30 03:30:00    6.1
2016-10-30 04:30:00    5.7
2016-10-30 05:30:00    5.2
2016-10-30 06:30:00    4.7
2016-10-30 07:30:00    4.1
2016-10-30 08:30:00    3.9
2016-10-30 09:30:00    3.5
dtype: float64


try:
    temp_series_paris_naive.tz_localize("Europe/Paris")
except Exception as e:
    print(type(e))
    print(e)

<class 'pytz.exceptions.AmbiguousTimeError'>
Cannot infer dst time from 2016-10-30 02:30:00, try using the 'ambiguous' argument


temp_series_paris_naive.tz_localize("Europe/Paris", ambiguous="infer")

2016-10-29 23:30:00+02:00    4.4
2016-10-30 00:30:00+02:00    5.1
2016-10-30 01:30:00+02:00    6.1
2016-10-30 02:30:00+02:00    6.2
2016-10-30 02:30:00+01:00    6.1
2016-10-30 03:30:00+01:00    6.1
2016-10-30 04:30:00+01:00    5.7
2016-10-30 05:30:00+01:00    5.2
2016-10-30 06:30:00+01:00    4.7
2016-10-30 07:30:00+01:00    4.1
2016-10-30 08:30:00+01:00    3.9
2016-10-30 09:30:00+01:00    3.5
dtype: float64


quarters = pd.period_range('2016Q1', periods=8, freq='Q')
quarters

PeriodIndex(['2016Q1', '2016Q2', '2016Q3', '2016Q4', '2017Q1', '2017Q2',
             '2017Q3', '2017Q4'],
            dtype='period[Q-DEC]', freq='Q-DEC')


quarters + 3

PeriodIndex(['2016Q4', '2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1',
             '2018Q2', '2018Q3'],
            dtype='period[Q-DEC]', freq='Q-DEC')


quarters.asfreq("M")

PeriodIndex(['2016-03', '2016-06', '2016-09', '2016-12', '2017-03', '2017-06',
             '2017-09', '2017-12'],
            dtype='period[M]', freq='M')


quarters.asfreq("M", how="start")

PeriodIndex(['2016-01', '2016-04', '2016-07', '2016-10', '2017-01', '2017-04',
             '2017-07', '2017-10'],
            dtype='period[M]', freq='M')


quarters.asfreq("A")

PeriodIndex(['2016', '2016', '2016', '2016', '2017', '2017', '2017', '2017'], dtype='period[A-DEC]', freq='A-DEC')


quarterly_revenue = pd.Series([300, 320, 290, 390, 320, 360, 310, 410], index = quarters)
quarterly_revenue

2016Q1    300
2016Q2    320
2016Q3    290
2016Q4    390
2017Q1    320
2017Q2    360
2017Q3    310
2017Q4    410
Freq: Q-DEC, dtype: int64


quarterly_revenue.plot(kind="line")
plt.show()


last_hours = quarterly_revenue.to_timestamp(how="end", freq="H")
last_hours

2016-03-31 23:59:59.999999999    300
2016-06-30 23:59:59.999999999    320
2016-09-30 23:59:59.999999999    290
2016-12-31 23:59:59.999999999    390
2017-03-31 23:59:59.999999999    320
2017-06-30 23:59:59.999999999    360
2017-09-30 23:59:59.999999999    310
2017-12-31 23:59:59.999999999    410
dtype: int64


last_hours.to_period()

2016Q1    300
2016Q2    320
2016Q3    290
2016Q4    390
2017Q1    320
2017Q2    360
2017Q3    310
2017Q4    410
Freq: Q-DEC, dtype: int64


months_2016 = pd.period_range("2016", periods=12, freq="M")
one_day_after_last_days = months_2016.asfreq("D") + 1
last_bdays = one_day_after_last_days.to_timestamp() - pd.tseries.offsets.BDay()
last_bdays.to_period("H") + 9

PeriodIndex(['2016-01-29 09:00', '2016-02-29 09:00', '2016-03-31 09:00',
             '2016-04-29 09:00', '2016-05-31 09:00', '2016-06-30 09:00',
             '2016-07-29 09:00', '2016-08-31 09:00', '2016-09-30 09:00',
             '2016-10-31 09:00', '2016-11-30 09:00', '2016-12-30 09:00'],
            dtype='period[H]', freq='H')


people_dict = {
    "weight": pd.Series([68, 83, 112], index=["alice", "bob", "charles"]),
    "birthyear": pd.Series([1984, 1985, 1992], index=["bob", "alice", "charles"], name="year"),
    "children": pd.Series([0, 3], index=["charles", "bob"]),
    "hobby": pd.Series(["Biking", "Dancing"], index=["alice", "bob"]),
}
people = pd.DataFrame(people_dict)
people


people["birthyear"]

alice      1985
bob        1984
charles    1992
Name: birthyear, dtype: int64


people[["birthyear", "hobby"]]


d2 = pd.DataFrame(
        people_dict,
        columns=["birthyear", "weight", "height"],
        index=["bob", "alice", "eugene"]
     )
d2


values = [
            [1985, np.nan, "Biking",   68],
            [1984, 3,      "Dancing",  83],
            [1992, 0,      np.nan,    112]
         ]
d3 = pd.DataFrame(
        values,
        columns=["birthyear", "children", "hobby", "weight"],
        index=["alice", "bob", "charles"]
     )
d3


masked_array = np.ma.asarray(values, dtype=np.object)
masked_array[(0, 2), (1, 2)] = np.ma.masked
d3 = pd.DataFrame(
        masked_array,
        columns=["birthyear", "children", "hobby", "weight"],
        index=["alice", "bob", "charles"]
     )
d3


d4 = pd.DataFrame(
         d3,
         columns=["hobby", "children"],
         index=["alice", "bob"]
     )
d4


people = pd.DataFrame({
    "birthyear": {"alice":1985, "bob": 1984, "charles": 1992},
    "hobby": {"alice":"Biking", "bob": "Dancing"},
    "weight": {"alice":68, "bob": 83, "charles": 112},
    "children": {"bob": 3, "charles": 0}
})
people


d5 = pd.DataFrame(
  {
    ("public", "birthyear"):
        {("Paris","alice"):1985, ("Paris","bob"): 1984, ("London","charles"): 1992},
    ("public", "hobby"):
        {("Paris","alice"):"Biking", ("Paris","bob"): "Dancing"},
    ("private", "weight"):
        {("Paris","alice"):68, ("Paris","bob"): 83, ("London","charles"): 112},
    ("private", "children"):
        {("Paris", "alice"):np.nan, ("Paris","bob"): 3, ("London","charles"): 0}
  }
)
d5


d5["public"]


d5["public", "hobby"]  # d5["public"]["hobby"]와 같습니다.

Paris   alice       Biking
        bob        Dancing
London  charles        NaN
Name: (public, hobby), dtype: object

d5


d5.columns = d5.columns.droplevel(level = 0)
d5


d6 = d5.T
d6


d7 = d6.stack()
d7


d8 = d7.unstack()
d8


d9 = d8.unstack()
d9

London  alice    birthyear        NaN
                 children         NaN
                 hobby            NaN
                 weight           NaN
        bob      birthyear        NaN
                 children         NaN
                 hobby            NaN
                 weight           NaN
        charles  birthyear       1992
                 children           0
                 hobby            NaN
                 weight           112
Paris   alice    birthyear       1985
                 children         NaN
                 hobby         Biking
                 weight            68
        bob      birthyear       1984
                 children           3
                 hobby        Dancing
                 weight            83
        charles  birthyear        NaN
                 children         NaN
                 hobby            NaN
                 weight           NaN
dtype: object


d10 = d9.unstack(level = (0,1))
d10


people


people.loc["charles"]

birthyear    1992
hobby         NaN
weight        112
children        0
Name: charles, dtype: object


people.iloc[2]

birthyear    1992
hobby         NaN
weight        112
children        0
Name: charles, dtype: object


people.iloc[1:3]


people[np.array([True, False, True])]


people[people["birthyear"] < 1990]


people


people["age"] = 2018 - people["birthyear"]  # "age" 열을 추가합니다
people["over 30"] = people["age"] > 30      # "over 30" 열을 추가합니다
birthyears = people.pop("birthyear")
del people["children"]

people


birthyears

alice      1985
bob        1984
charles    1992
Name: birthyear, dtype: int64


people["pets"] = pd.Series({"bob": 0, "charles": 5, "eugene":1})  # alice 누락됨, eugene은 무시됨
people


people.insert(1, "height", [172, 181, 185])
people


people.assign(
    body_mass_index = people["weight"] / (people["height"] / 100) ** 2,
    has_pets = people["pets"] > 0
)


try:
    people.assign(
        body_mass_index = people["weight"] / (people["height"] / 100) ** 2,
        overweight = people["body_mass_index"] > 25
    )
except KeyError as e:
    print("키 에러:", e)

키 에러: 'body_mass_index'


d6 = people.assign(body_mass_index = people["weight"] / (people["height"] / 100) ** 2)
d6.assign(overweight = d6["body_mass_index"] > 25)


try:
    (people
         .assign(body_mass_index = people["weight"] / (people["height"] / 100) ** 2)
         .assign(overweight = people["body_mass_index"] > 25)
    )
except KeyError as e:
    print("키 에러:", e)

키 에러: 'body_mass_index'


(people
     .assign(body_mass_index = lambda df: df["weight"] / (df["height"] / 100) ** 2)
     .assign(overweight = lambda df: df["body_mass_index"] > 25)
)


people.eval("weight / (height/100) ** 2 > 25")

alice      False
bob         True
charles     True
dtype: bool


people.eval("body_mass_index = weight / (height/100) ** 2", inplace=True)
people


overweight_threshold = 30
people.eval("overweight = body_mass_index > @overweight_threshold", inplace=True)
people


people.query("age > 30 and pets == 0")


people.sort_index(ascending=False)


people.sort_index(axis=1, inplace=True)
people


people.sort_values(by="age", inplace=True)
people


people.plot(kind = "line", x = "body_mass_index", y = ["height", "weight"])
plt.show()


people.plot(kind = "scatter", x = "height", y = "weight", s=[40, 120, 200])
plt.show()


grades_array = np.array([[8,8,9],[10,9,9],[4, 8, 2], [9, 10, 10]])
grades = pd.DataFrame(grades_array, columns=["sep", "oct", "nov"], index=["alice","bob","charles","darwin"])
grades


np.sqrt(grades)


grades + 1


grades >= 5


grades.mean()

sep    7.75
oct    8.75
nov    7.50
dtype: float64


(grades > 5).all()

sep    False
oct     True
nov    False
dtype: bool


(grades > 5).all(axis = 1)

alice       True
bob         True
charles    False
darwin      True
dtype: bool


(grades == 10).any(axis = 1)

alice      False
bob         True
charles    False
darwin      True
dtype: bool


grades - grades.mean()  # grades - [7.75, 8.75, 7.50] 와 동일


pd.DataFrame([[7.75, 8.75, 7.50]]*4, index=grades.index, columns=grades.columns)


grades - grades.values.mean() # 모든 점수에서 전체 평균(8.00)을 뺍니다


bonus_array = np.array([[0,np.nan,2],[np.nan,1,0],[0, 1, 0], [3, 3, 0]])
bonus_points = pd.DataFrame(bonus_array, columns=["oct", "nov", "dec"], index=["bob","colin", "darwin", "charles"])
bonus_points


grades + bonus_points


(grades + bonus_points).fillna(0)


fixed_bonus_points = bonus_points.fillna(0)
fixed_bonus_points.insert(0, "sep", 0)
fixed_bonus_points.loc["alice"] = 0
grades + fixed_bonus_points


bonus_points


bonus_points.interpolate(axis=1)


better_bonus_points = bonus_points.copy()
better_bonus_points.insert(0, "sep", 0)
better_bonus_points.loc["alice"] = 0
better_bonus_points = better_bonus_points.interpolate(axis=1)
better_bonus_points


grades + better_bonus_points


grades["dec"] = np.nan
final_grades = grades + better_bonus_points
final_grades


final_grades_clean = final_grades.dropna(how="all")
final_grades_clean


final_grades_clean = final_grades_clean.dropna(axis=1, how="all")
final_grades_clean


final_grades["hobby"] = ["Biking", "Dancing", np.nan, "Dancing", "Biking"]
final_grades


grouped_grades = final_grades.groupby("hobby")
grouped_grades

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fccd01f18d0>


grouped_grades.mean()


bonus_points


more_grades = final_grades_clean.stack().reset_index()
more_grades.columns = ["name", "month", "grade"]
more_grades["bonus"] = [np.nan, np.nan, np.nan, 0, np.nan, 2, 3, 3, 0, 0, 1, 0]
more_grades


pd.pivot_table(more_grades, index="name")


pd.pivot_table(more_grades, index="name", values=["grade","bonus"], aggfunc=np.max)


pd.pivot_table(more_grades, index="name", values="grade", columns="month", margins=True)


pd.pivot_table(more_grades, index=("name", "month"), margins=True)


much_data = np.fromfunction(lambda x,y: (x+y*y)%17*11, (10000, 26))
large_df = pd.DataFrame(much_data, columns=list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
large_df[large_df % 16 == 0] = np.nan
large_df.insert(3,"some_text", "Blabla")
large_df


large_df.head()


large_df.tail(n=2)


large_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   A          8823 non-null   float64
 1   B          8824 non-null   float64
 2   C          8824 non-null   float64
 3   some_text  10000 non-null  object 
 4   D          8824 non-null   float64
 5   E          8822 non-null   float64
 6   F          8824 non-null   float64
 7   G          8824 non-null   float64
 8   H          8822 non-null   float64
 9   I          8823 non-null   float64
 10  J          8823 non-null   float64
 11  K          8822 non-null   float64
 12  L          8824 non-null   float64
 13  M          8824 non-null   float64
 14  N          8822 non-null   float64
 15  O          8824 non-null   float64
 16  P          8824 non-null   float64
 17  Q          8824 non-null   float64
 18  R          8823 non-null   float64
 19  S          8824 non-null   float64
 20  T          8824 non-null   float64
 21  U          8824 non-null   float64
 22  V          8822 non-null   float64
 23  W          8824 non-null   float64
 24  X          8824 non-null   float64
 25  Y          8822 non-null   float64
 26  Z          8823 non-null   float64
dtypes: float64(26), object(1)
memory usage: 2.1+ MB


large_df.describe()


my_df = pd.DataFrame(
    [["Biking", 68.5, 1985, np.nan], ["Dancing", 83.1, 1984, 3]], 
    columns=["hobby","weight","birthyear","children"],
    index=["alice", "bob"]
)
my_df


my_df.to_csv("my_df.csv")
my_df.to_html("my_df.html")
my_df.to_json("my_df.json")


for filename in ("my_df.csv", "my_df.html", "my_df.json"):
    print("#", filename)
    with open(filename, "rt") as f:
        print(f.read())
        print()

# my_df.csv
,hobby,weight,birthyear,children
alice,Biking,68.5,1985,
bob,Dancing,83.1,1984,3.0


# my_df.html
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>hobby</th>
      <th>weight</th>
      <th>birthyear</th>
      <th>children</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>alice</th>
      <td>Biking</td>
      <td>68.5</td>
      <td>1985</td>
      <td>NaN</td>
    </tr>
    <tr>
      <th>bob</th>
      <td>Dancing</td>
      <td>83.1</td>
      <td>1984</td>
      <td>3.0</td>
    </tr>
  </tbody>
</table>

# my_df.json
{"hobby":{"alice":"Biking","bob":"Dancing"},"weight":{"alice":68.5,"bob":83.1},"birthyear":{"alice":1985,"bob":1984},"children":{"alice":null,"bob":3.0}}


try:
    my_df.to_excel("my_df.xlsx", sheet_name='People')
except ImportError as e:
    print(e)

No module named 'openpyxl'


my_df_loaded = pd.read_csv("my_df.csv", index_col=0)
my_df_loaded


us_cities = None
try:
    csv_url = "http://simplemaps.com/files/cities.csv"
    us_cities = pd.read_csv(csv_url, index_col=0)
    us_cities = us_cities.head()
except IOError as e:
    print(e)
us_cities

HTTP Error 403: Forbidden


city_loc = pd.DataFrame(
    [
        ["CA", "San Francisco", 37.781334, -122.416728],
        ["NY", "New York", 40.705649, -74.008344],
        ["FL", "Miami", 25.791100, -80.320733],
        ["OH", "Cleveland", 41.473508, -81.739791],
        ["UT", "Salt Lake City", 40.755851, -111.896657]
    ], columns=["state", "city", "lat", "lng"])
city_loc


city_pop = pd.DataFrame(
    [
        [808976, "San Francisco", "California"],
        [8363710, "New York", "New-York"],
        [413201, "Miami", "Florida"],
        [2242193, "Houston", "Texas"]
    ], index=[3,4,5,6], columns=["population", "city", "state"])
city_pop


pd.merge(left=city_loc, right=city_pop, on="city")


all_cities = pd.merge(left=city_loc, right=city_pop, on="city", how="outer")
all_cities


pd.merge(left=city_loc, right=city_pop, on="city", how="right")


city_pop2 = city_pop.copy()
city_pop2.columns = ["population", "name", "state"]
pd.merge(left=city_loc, right=city_pop2, left_on="city", right_on="name")


result_concat = pd.concat([city_loc, city_pop])
result_concat


result_concat.loc[3]


pd.concat([city_loc, city_pop], ignore_index=True)


pd.concat([city_loc, city_pop], join="inner")


pd.concat([city_loc, city_pop], axis=1)


pd.concat([city_loc.set_index("city"), city_pop.set_index("city")], axis=1)


city_loc.append(city_pop)


city_eco = city_pop.copy()
city_eco["eco_code"] = [17, 17, 34, 20]
city_eco


city_eco["economy"] = city_eco["eco_code"].astype('category')
city_eco["economy"].cat.categories

Int64Index([17, 20, 34], dtype='int64')


city_eco["economy"].cat.categories = ["Finance", "Energy", "Tourism"]
city_eco


city_eco.sort_values(by="economy", ascending=False)

	sep	oct	nov
alice	2.828427	2.828427	3.000000
bob	3.162278	3.000000	3.000000
charles	2.000000	2.828427	1.414214
darwin	3.000000	3.162278	3.162278

	sep	oct	nov
alice	0.25	-0.75	1.5
bob	2.25	0.25	1.5
charles	-3.75	-0.75	-5.5
darwin	1.25	1.25	2.5

		public		private
		birthyear	hobby	weight	children
Paris	alice	1985	Biking	68	NaN
Paris	bob	1984	Dancing	83	3.0
London	charles	1992	NaN	112	0.0

	hobby	weight	age	over 30	pets
alice	Biking	68	33	True	NaN
bob	Dancing	83	34	True	0.0
charles	NaN	112	26	False	5.0

	hobby	height	weight	age	over 30	pets	body_mass_index	overweight
charles	NaN	185	112	26	False	5.0	32.724617	True
bob	Dancing	181	83	34	True	0.0	25.335002	False
alice	Biking	172	68	33	True	NaN	22.985398	False

	sep	oct	nov
alice	7.75	8.75	7.5
bob	7.75	8.75	7.5
charles	7.75	8.75	7.5
darwin	7.75	8.75	7.5

	dec	nov	oct	sep
alice	NaN	NaN	NaN	NaN
bob	NaN	NaN	9.0	NaN
charles	NaN	5.0	11.0	NaN
colin	NaN	NaN	NaN	NaN
darwin	NaN	11.0	10.0	NaN

	bonus	grade
name
alice	NaN	8.333333
bob	1.000000	9.666667
charles	2.000000	6.666667
darwin	0.333333	10.000000

month	nov	oct	sep	All
name
alice	9.00	8.0	8.00	8.333333
bob	10.00	9.0	10.00	9.666667
charles	5.00	11.0	4.00	6.666667
darwin	11.00	10.0	9.00	10.000000
All	8.75	9.5	7.75	8.666667

		bonus	grade
name	month
alice	nov	NaN	9.00
	oct	NaN	8.00
	sep	NaN	8.00
bob	nov	2.000	10.00
	oct	NaN	9.00
	sep	0.000	10.00
charles	nov	0.000	5.00
	oct	3.000	11.00
	sep	3.000	4.00
darwin	nov	0.000	11.00
	oct	1.000	10.00
	sep	0.000	9.00
All		1.125	8.75

	A	B	C	some_text	D	E	F	G	H	I	...	Q	R	S	T	U	V	W	X	Y	Z
0	NaN	11.0	44.0	Blabla	99.0	NaN	88.0	22.0	165.0	143.0	...	11.0	NaN	11.0	44.0	99.0	NaN	88.0	22.0	165.0	143.0
1	11.0	22.0	55.0	Blabla	110.0	NaN	99.0	33.0	NaN	154.0	...	22.0	11.0	22.0	55.0	110.0	NaN	99.0	33.0	NaN	154.0
2	22.0	33.0	66.0	Blabla	121.0	11.0	110.0	44.0	NaN	165.0	...	33.0	22.0	33.0	66.0	121.0	11.0	110.0	44.0	NaN	165.0
3	33.0	44.0	77.0	Blabla	132.0	22.0	121.0	55.0	11.0	NaN	...	44.0	33.0	44.0	77.0	132.0	22.0	121.0	55.0	11.0	NaN
4	44.0	55.0	88.0	Blabla	143.0	33.0	132.0	66.0	22.0	NaN	...	55.0	44.0	55.0	88.0	143.0	33.0	132.0	66.0	22.0	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9995	NaN	NaN	33.0	Blabla	88.0	165.0	77.0	11.0	154.0	132.0	...	NaN	NaN	NaN	33.0	88.0	165.0	77.0	11.0	154.0	132.0
9996	NaN	11.0	44.0	Blabla	99.0	NaN	88.0	22.0	165.0	143.0	...	11.0	NaN	11.0	44.0	99.0	NaN	88.0	22.0	165.0	143.0
9997	11.0	22.0	55.0	Blabla	110.0	NaN	99.0	33.0	NaN	154.0	...	22.0	11.0	22.0	55.0	110.0	NaN	99.0	33.0	NaN	154.0
9998	22.0	33.0	66.0	Blabla	121.0	11.0	110.0	44.0	NaN	165.0	...	33.0	22.0	33.0	66.0	121.0	11.0	110.0	44.0	NaN	165.0
9999	33.0	44.0	77.0	Blabla	132.0	22.0	121.0	55.0	11.0	NaN	...	44.0	33.0	44.0	77.0	132.0	22.0	121.0	55.0	11.0	NaN

	A	B	C	D	E	F	G	H	I	J	...	Q	R	S	T	U	V	W	X	Y	Z
count	8823.000000	8824.000000	8824.000000	8824.000000	8822.000000	8824.000000	8824.000000	8822.000000	8823.000000	8823.000000	...	8824.000000	8823.000000	8824.000000	8824.000000	8824.000000	8822.000000	8824.000000	8824.000000	8822.000000	8823.000000
mean	87.977559	87.972575	87.987534	88.012466	87.983791	88.007480	87.977561	88.000000	88.022441	88.022441	...	87.972575	87.977559	87.972575	87.987534	88.012466	87.983791	88.007480	87.977561	88.000000	88.022441
std	47.535911	47.535523	47.521679	47.521679	47.535001	47.519371	47.529755	47.536879	47.535911	47.535911	...	47.535523	47.535911	47.535523	47.521679	47.521679	47.535001	47.519371	47.529755	47.536879	47.535911
min	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	...	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000	11.000000
25%	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	...	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000	44.000000
50%	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	...	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000	88.000000
75%	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	...	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000	132.000000
max	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	...	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000	165.000000

	state	city	lat	lng
0	CA	San Francisco	37.781334	-122.416728
1	NY	New York	40.705649	-74.008344
2	FL	Miami	25.791100	-80.320733
3	OH	Cleveland	41.473508	-81.739791
4	UT	Salt Lake City	40.755851	-111.896657

	population	city	state
3	808976	San Francisco	California
4	8363710	New York	New-York
5	413201	Miami	Florida
6	2242193	Houston	Texas

설정¶

Series 객체¶

Series 만들기¶

1D ndarray와 비슷합니다¶

인덱스 레이블¶

dict에서 초기화¶

자동 정렬¶

스칼라로 초기화¶

Series 이름¶

Series 그래프 출력¶

시간 다루기¶

시간 범위¶

리샘플링¶

업샘플링과 보간¶

시간대¶

기간¶

DataFrame 객체¶

DataFrame 만들기¶

멀티 인덱싱¶

레벨 낮추기¶

전치¶

레벨 스택과 언스택¶

대부분의 메서드는 수정된 복사본을 반환합니다¶

행 참조하기¶

열 추가, 삭제¶

새로운 열 할당하기¶

표현식 평가¶

DataFrame 쿼리하기¶

DataFrame 정렬¶

DataFrame 그래프 그리기¶

DataFrame 연산¶

자동 정렬¶

누락된 데이터 다루기¶

groupby로 집계하기¶

피봇 테이블¶

함수¶

저장 & 로딩¶

저장¶

로딩¶

DataFrame 합치기¶

SQL 조인¶

연결¶

범주¶

그 다음엔?¶

`Series` 객체¶

`Series` 만들기¶

1D `ndarray`와 비슷합니다¶

`dict`에서 초기화¶

`Series` 이름¶

`Series` 그래프 출력¶

`DataFrame` 객체¶

`DataFrame` 만들기¶

`DataFrame` 쿼리하기¶

`DataFrame` 정렬¶

`DataFrame` 그래프 그리기¶

`DataFrame` 연산¶

`groupby`로 집계하기¶

`DataFrame` 합치기¶