KeithGalli · Craig-Sperlazza · Jun 8, 2021 · Jun 8, 2021 · Jun 8, 2021
diff --git a/1_analysis.py b/1_analysis.py
@@ -0,0 +1,58 @@
+import pandas as pd
+
+#load csv in as a data frame###############################################################################
+df = pd.read_csv('pokemon_data.csv')
+print(df.head(3))
+print(df.tail(5))
+
+
+# #load excel in as a data frame
+# df_xlsx = pd.read_excel('pokemon_data.xlsx')
+# print("EXCEL________________________________________________________________________")
+# print(df_xlsx.head(3))
+
+
+# #load in a tab seperated file as data frame
+# df_tab = pd.read_csv('pokemon_data.txt', delimiter='\t')
+# print("TAB SEPERATED________________________________________________________________")
+# print(df_tab.head(5))
+
+##########################################################################################################
+#read the headers
+print(df.columns)
+
+#read a specific column
+print(df['Name'][0:5])
+
+#read mulitple columns
+#read a specific column
+print(df[['Name', 'HP', 'Type 1']][5:10])
+
+#read a specific row(s)
+print(df.iloc[1:3])
+
+#read a specific location (R,C)
+#second row, first column
+print(df.iloc[2,1])
+
+####Iteration VERY USEFUL########################################################################################
+#iterate through each row
+for index, row in df.iterrows():
+    print (index, row['Name'])
+
+######df.loc----userful for text type searching (as opposed to index, row integers)############################
+print(df.loc[df['Type 1'] == "Fire"])
+
+############sorting#####################################################################################
+#pass in the column you want to sort by
+print(df.sort_values('Name'))
+
+#sort in reverse order,
+print(df.sort_values('Name', ascending=False))
+
+#sort by main and secondary coolumn
+#we can choose the first as ascedning and the second as descending
+print(df.sort_values(['Type 1', 'HP'], ascending=[1,0]))
+
+
+
diff --git a/2_make_changes.py b/2_make_changes.py
@@ -0,0 +1,42 @@
+import pandas as pd
+
+df = pd.read_csv('pokemon_data.csv')
+
+#IN THESE EXAMPLES WE WILL ESSENTIALLY REARRANGE DATA. WE ARE GOING  TO USE A LOT OF INDEXING
+#BE CAREFUL---IF YOUR DATA IS CHANGING HARDCODING INDEXING IS BAD-----USE THE NAMES INSTEAD LIKE IN THE FIRST EXAMPLE
+
+
+#create a new column that totals all the stats
+#this way is easiest to read-----and SAFEST
+df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
+print(df.head(5))
+
+#drop the column we made:
+df = df.drop(columns=['Total'])
+print(df.head(5))
+
+# add a column in a more succinct way using iloc (integer location)
+# : means all rows
+# then we add up the 4th through 9th column
+# axis=1 sums horizontally, axis=0 sums vertically
+df['Total'] = df.iloc[:, 4:10].sum(axis=1)
+print(df.head(5))
+
+
+# lets rearrange a column to move total over to the left of HP
+#first get the columns as a list
+#Then concantenate them---note the single column is considered a string so you need to make it a list by using the [cols[-1]]
+cols = list(df.columns.values)
+df = df[cols[0:4] + [cols[-1]] + cols[4:12]]
+print(df.head(5))
+
+
+#########################LETS SAVE OUR DATA################################
+#by setting index=False, we remove the left most index column
+df.to_csv('modified_pokemon.csv', index=False)
+
+#to save as excel
+df.to_excel('modified_poke.xlsx', index=False)
+
+#to save as a tab seperated file
+df.to_csv('modified_poke2.txt', index=False, sep='\t')
diff --git a/3_filerting_data.py b/3_filerting_data.py
@@ -0,0 +1,74 @@
+import pandas as pd
+import re
+
+df = pd.read_csv('pokemon_data.csv')
+
+#filter where a column equals a value
+print(df.loc[df['Type 1'] == 'Grass'])
+
+#multiple text location conditions
+# & sign for both x and y conditions
+# | sign for x and/or y conditions
+print(df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')])
+
+#can also set parameters with values and not just locations
+print(df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)])
+
+
+#can also create new data frames and save them#################################################################
+new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]
+
+# WHEN YOU FILTER DATA IT KEEPS THE OLD INDEX NUMBERS---but you can reset them---
+# by default it saves the old index as a new column but you can get rid of the old indicies with drop=True
+print(new_df)
+new_df = new_df.reset_index(drop=True)
+print(new_df)
+#new_df.to_csv('filtered.csv')
+
+
+# RegEx Filtering###############################################################################
+# Example1: Filter out all names that contain mega
+contains_df = df.loc[df['Name'].str.contains('Mega')]
+print(contains_df)
+
+# Example2: Filter out all names that DO NOT contain mega
+contains_df2 = df.loc[~df['Name'].str.contains('Mega')]
+print(contains_df2)
+
+# Example3: Filter out if type 1 is grass or fire using regEx
+# regex=True means to use regular expressions
+# flags=re.I means to ignore case
+# 'fire|grass means fire or grass
+
+regex_df = df.loc[df['Type 1'].str.contains('fire|grass', flags=re.I, regex=True)]
+print(regex_df)
+
+#Example 4: All pokemon names that start with Pi
+#'^pi[a-z]*' 
+# ######## ^ means the start of the word
+# ####### [a-z] means it contains letters a-z
+# ####### * means any number of letters (i.e. the name may be 3 or 100 characters long)
+regex_df2 = df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)]
+print(regex_df2)
+
+
+#####################################CONDITIONAL CHANGES#################################################
+#Example 1: Under type 1----change the fire type to flamer
+df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'
+print(df)
+#change it back
+df.loc[df['Type 1'] == 'Flamer', 'Type 1'] = 'Fire'
+print(df)
+
+#Example 2: Change all fire pokemon to legendary----use one codnition to change another columns value
+df.loc[df['Type 1']== 'Fire', 'Legendary'] = True
+print(df.loc[df['Type 1'] == "Fire"])
+
+
+####reload dataframe to get rid of making all the legendary true
+df = pd.read_csv('modified_pokemon.csv')
+print(df)
+
+#Example 3: Change two columns based on a condition by using lists
+df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = [88, 'Yeah']
+print(df)
diff --git a/4_aggregate_stats.py b/4_aggregate_stats.py
@@ -0,0 +1,3 @@
+import pandas as pd
+
+df = pd.read_csv('modified_pokemon.csv')
diff --git a/analysis.py b/analysis.py
diff --git a/filtered.csv b/filtered.csv
@@ -0,0 +1,6 @@
+,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
+0,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
+1,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
+2,45,Vileplume,Grass,Poison,75,80,85,110,90,50,1,False
+3,71,Victreebel,Grass,Poison,80,105,65,100,70,70,1,False
+4,591,Amoonguss,Grass,Poison,114,85,70,85,80,30,5,False
diff --git a/modified_poke.xlsx b/modified_poke.xlsx
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import pandas as pd

		df = pd.read_csv('modified_pokemon.csv')