# 一、零碎知识点

## 1、pandas基础

10 mins to pandas

## 2、MovieLens电影数据分析实例

### ①导入并合并数据：

import pandas as pd

unames = ['user_id','gender','age','occupation','zip']

rating_names = ['user_id','movie_id','rating','timestamp']

movie_names = ['movie_id','title','genres']

data = pd.merge(pd.merge(users,ratings),movies)


### ②查看数据

print(len(data))


1000209
user_id	gender	age	occupation	zip	movie_id	rating	timestamp	title	genres
0	1	F	1	10	48067	1193	5	978300760	One Flew Over the Cuckoo's Nest (1975)	Drama
1	2	M	56	16	70072	1193	5	978298413	One Flew Over the Cuckoo's Nest (1975)	Drama
2	12	M	25	12	32793	1193	4	978220179	One Flew Over the Cuckoo's Nest (1975)	Drama
3	15	M	25	7	22903	1193	4	978199279	One Flew Over the Cuckoo's Nest (1975)	Drama
4	17	M	50	1	95350	1193	5	978158471	One Flew Over the Cuckoo's Nest (1975)	Drama
5	18	F	18	3	95825	1193	4	978156168	One Flew Over the Cuckoo's Nest (1975)	Drama
6	19	M	1	10	48073	1193	5	982730936	One Flew Over the Cuckoo's Nest (1975)	Drama
7	24	F	25	7	10023	1193	5	978136709	One Flew Over the Cuckoo's Nest (1975)	Drama
8	28	F	25	1	14607	1193	3	978125194	One Flew Over the Cuckoo's Nest (1975)	Drama
9	33	M	45	3	55421	1193	5	978557765	One Flew Over the Cuckoo's Nest (1975)	Drama


### ③按性别统计，创建数据透视表

ratings_by_gender = data.pivot_table(values='rating',index='title',columns='gender',aggfunc='mean')


gender	F	M
title
\$1,000,000 Duck (1971)	3.375000	2.761905
'Night Mother (1986)	3.388889	3.352941
'Til There Was You (1997)	2.675676	2.733333
'burbs, The (1989)	2.793478	2.962085
...And Justice for All (1979)	3.828571	3.689024
1-900 (1994)	2.000000	3.000000
10 Things I Hate About You (1999)	3.646552	3.311966
101 Dalmatians (1961)	3.791444	3.500000
101 Dalmatians (1996)	3.240000	2.911215
12 Angry Men (1957)	4.184397	4.328421


### ④按评论数排序

ratings_by_title = data.groupby('title').size()


title
American Beauty (1999)                                   3428
Star Wars: Episode IV - A New Hope (1977)                2991
Star Wars: Episode V - The Empire Strikes Back (1980)    2990
Star Wars: Episode VI - Return of the Jedi (1983)        2883
Jurassic Park (1993)                                     2672
Saving Private Ryan (1998)                               2653
Terminator 2: Judgment Day (1991)                        2649
Matrix, The (1999)                                       2590
Back to the Future (1985)                                2583
Silence of the Lambs, The (1991)                         2578
dtype: int64


