En estas notas se realizan pruebas con la estructura de datos "Series".
In [1]:
#importacion estandar de pandas
import pandas as pd
edad = pd.Series([10, 20, 14, 11])
edad
Out[1]:
In [2]:
bacteria = pd.Series([10, 20, 14, 11],
index=['a', 'b', 'c', 'd'])
bacteria
Out[2]:
In [3]:
bacteria_dict = {'a': 10, 'b': 20, 'c': 14, 'd': 11}
pd.Series(bacteria_dict)
Out[3]:
In [4]:
# Datos
bacteria.values
Out[4]:
In [5]:
# Indices
bacteria.index
Out[5]:
In [6]:
bacteria['b']
Out[6]:
In [7]:
bacteria[1]
Out[7]:
In [8]:
bacteria[['a','b']]
Out[8]:
In [9]:
bacteria[(bacteria > 10) & \
(bacteria < 20)]
Out[9]:
In [10]:
bacteria
Out[10]:
In [11]:
bacteria = bacteria + 5
print bacteria
In [12]:
bacteria = bacteria + bacteria
print bacteria
In [13]:
bacteria = bacteria > 30
print bacteria
In [14]:
# retorna True si algun elemento es True
print bacteria.any()
# retorna True si todos los elementos son True
print bacteria.all()
In [15]:
bacteria = pd.Series([10, 20, 14, 11], index=['a', 'b', 'c', 'd'])
def f(x):
if x % 2 != 0:
return x + 100
else:
return x
bacteria.apply(f)
Out[15]:
In [16]:
%%timeit
# mostrando performance
ds = pd.Series(range(10000))
for counter in range(len(ds)):
ds[counter] = f(ds[counter])
In [17]:
%%timeit
ds = pd.Series(range(10000))
ds = ds.apply(f)
In [18]:
x = pd.Series([10, 20, 14, 11])
x
Out[18]:
In [19]:
y = x
y
Out[19]:
In [20]:
y[0]
Out[20]:
In [21]:
y[0] = 100
# otra manera de cambiar un dato
y.loc[1] = 400
In [22]:
y
Out[22]:
In [23]:
x
Out[23]:
In [24]:
bacteria.describe(percentiles=[0.25, 0.5, 0.75])
Out[24]:
In [25]:
import numpy as np
bacteria.astype(np.float64)
Out[25]:
In [26]:
pd.Series([632, 569, None], index=['a', 'c', 'e'])
Out[26]:
In [27]:
pd.Series([632, 569, np.nan], index=['a', 'c', 'e'])
Out[27]:
In [28]:
bacteria_dict = {'a': 632, 'b': 1638, 'c': 569, 'd': 'None'}
pd.Series(bacteria_dict, index=['a','c','e'])
Out[28]:
In [29]:
bacteria_dict = {'a': 632, 'b': 1638, 'c': 569, 'd': None}
pd.Series(bacteria_dict, index=['a','c','e'])
Out[29]:
In [30]:
bacteria_dict = {'a': 632, 'b': 1638, 'c': 569, 'd': 'NA'}
pd.Series(bacteria_dict, index=['a','c','e'])
Out[30]:
In [31]:
bacteria_dict = {'a': 632, 'b': 1638, 'c': 569, 'd': 'NaN'}
bacteria_nueva = pd.Series(bacteria_dict, index=['a','c','e'])
bacteria_nueva
Out[31]:
In [32]:
bacteria_nueva.notnull()
Out[32]:
In [33]:
bacteria_nueva.notnull().all()
Out[33]:
In [34]:
# usar "inplace=True" para modificar la "Serie"
bacteria_nueva.fillna(bacteria_nueva.mean())
Out[34]:
In [35]:
bacteria_nueva.dropna()
Out[35]: