Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test2 #4

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions 1_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pandas as pd

#load csv in as a data frame###############################################################################
df = pd.read_csv('pokemon_data.csv')
print(df.head(3))
print(df.tail(5))


# #load excel in as a data frame
# df_xlsx = pd.read_excel('pokemon_data.xlsx')
# print("EXCEL________________________________________________________________________")
# print(df_xlsx.head(3))


# #load in a tab seperated file as data frame
# df_tab = pd.read_csv('pokemon_data.txt', delimiter='\t')
# print("TAB SEPERATED________________________________________________________________")
# print(df_tab.head(5))

##########################################################################################################
#read the headers
print(df.columns)

#read a specific column
print(df['Name'][0:5])

#read mulitple columns
#read a specific column
print(df[['Name', 'HP', 'Type 1']][5:10])

#read a specific row(s)
print(df.iloc[1:3])

#read a specific location (R,C)
#second row, first column
print(df.iloc[2,1])

####Iteration VERY USEFUL########################################################################################
#iterate through each row
for index, row in df.iterrows():
print (index, row['Name'])

######df.loc----userful for text type searching (as opposed to index, row integers)############################
print(df.loc[df['Type 1'] == "Fire"])

############sorting#####################################################################################
#pass in the column you want to sort by
print(df.sort_values('Name'))

#sort in reverse order,
print(df.sort_values('Name', ascending=False))

#sort by main and secondary coolumn
#we can choose the first as ascedning and the second as descending
print(df.sort_values(['Type 1', 'HP'], ascending=[1,0]))



42 changes: 42 additions & 0 deletions 2_make_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pandas as pd

df = pd.read_csv('pokemon_data.csv')

#IN THESE EXAMPLES WE WILL ESSENTIALLY REARRANGE DATA. WE ARE GOING TO USE A LOT OF INDEXING
#BE CAREFUL---IF YOUR DATA IS CHANGING HARDCODING INDEXING IS BAD-----USE THE NAMES INSTEAD LIKE IN THE FIRST EXAMPLE


#create a new column that totals all the stats
#this way is easiest to read-----and SAFEST
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
print(df.head(5))

#drop the column we made:
df = df.drop(columns=['Total'])
print(df.head(5))

# add a column in a more succinct way using iloc (integer location)
# : means all rows
# then we add up the 4th through 9th column
# axis=1 sums horizontally, axis=0 sums vertically
df['Total'] = df.iloc[:, 4:10].sum(axis=1)
print(df.head(5))


# lets rearrange a column to move total over to the left of HP
#first get the columns as a list
#Then concantenate them---note the single column is considered a string so you need to make it a list by using the [cols[-1]]
cols = list(df.columns.values)
df = df[cols[0:4] + [cols[-1]] + cols[4:12]]
print(df.head(5))


#########################LETS SAVE OUR DATA################################
#by setting index=False, we remove the left most index column
df.to_csv('modified_pokemon.csv', index=False)

#to save as excel
df.to_excel('modified_poke.xlsx', index=False)

#to save as a tab seperated file
df.to_csv('modified_poke2.txt', index=False, sep='\t')
74 changes: 74 additions & 0 deletions 3_filerting_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pandas as pd
import re

df = pd.read_csv('pokemon_data.csv')

#filter where a column equals a value
print(df.loc[df['Type 1'] == 'Grass'])

#multiple text location conditions
# & sign for both x and y conditions
# | sign for x and/or y conditions
print(df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')])

#can also set parameters with values and not just locations
print(df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)])


#can also create new data frames and save them#################################################################
new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]

# WHEN YOU FILTER DATA IT KEEPS THE OLD INDEX NUMBERS---but you can reset them---
# by default it saves the old index as a new column but you can get rid of the old indicies with drop=True
print(new_df)
new_df = new_df.reset_index(drop=True)
print(new_df)
#new_df.to_csv('filtered.csv')


# RegEx Filtering###############################################################################
# Example1: Filter out all names that contain mega
contains_df = df.loc[df['Name'].str.contains('Mega')]
print(contains_df)

# Example2: Filter out all names that DO NOT contain mega
contains_df2 = df.loc[~df['Name'].str.contains('Mega')]
print(contains_df2)

# Example3: Filter out if type 1 is grass or fire using regEx
# regex=True means to use regular expressions
# flags=re.I means to ignore case
# 'fire|grass means fire or grass

regex_df = df.loc[df['Type 1'].str.contains('fire|grass', flags=re.I, regex=True)]
print(regex_df)

#Example 4: All pokemon names that start with Pi
#'^pi[a-z]*'
# ######## ^ means the start of the word
# ####### [a-z] means it contains letters a-z
# ####### * means any number of letters (i.e. the name may be 3 or 100 characters long)
regex_df2 = df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)]
print(regex_df2)


#####################################CONDITIONAL CHANGES#################################################
#Example 1: Under type 1----change the fire type to flamer
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'
print(df)
#change it back
df.loc[df['Type 1'] == 'Flamer', 'Type 1'] = 'Fire'
print(df)

#Example 2: Change all fire pokemon to legendary----use one codnition to change another columns value
df.loc[df['Type 1']== 'Fire', 'Legendary'] = True
print(df.loc[df['Type 1'] == "Fire"])


####reload dataframe to get rid of making all the legendary true
df = pd.read_csv('modified_pokemon.csv')
print(df)

#Example 3: Change two columns based on a condition by using lists
df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = [88, 'Yeah']
print(df)
3 changes: 3 additions & 0 deletions 4_aggregate_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pandas as pd

df = pd.read_csv('modified_pokemon.csv')
6 changes: 0 additions & 6 deletions analysis.py

This file was deleted.

6 changes: 6 additions & 0 deletions filtered.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
1,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
2,45,Vileplume,Grass,Poison,75,80,85,110,90,50,1,False
3,71,Victreebel,Grass,Poison,80,105,65,100,70,70,1,False
4,591,Amoonguss,Grass,Poison,114,85,70,85,80,30,5,False
Binary file added modified_poke.xlsx
Binary file not shown.
Loading