Data Analytics




import nltk

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize, sent_tokenize

text = """ keep working. Keep striving. Never give up. Fall down seven times, get

up eight. Ease is a greater threat to progress than hardship. Ease is a greater

threat to progress than

hardship. So, keep moving, keep growing, keep learning. See you at work"""

stopWords = set(stopwords.words("english"))

words = word_tokenize(text)

freqTable = dict()

for word in words:

word = word.lower()

if word in stopWords:

continue

if word in freqTable:

freqTable[word] += 1

else:

freqTable[word] = 1

sentences = sent_tokenize(text)

sentenceValue = dict()

for sentence in sentences:

for word, freq in freqTable.items():

if word in sentence.lower():

if sentence in sentenceValue:

sentenceValue[sentence] += freq

else:

sentenceValue[sentence] = freq

sumValues = 0

for sentence in sentenceValue:

sumValues += sentenceValue[sentence]

average = int(sumValues / len(sentenceValue))

summary = ''

for sentence in sentences:

if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):

summary += " " + sentence

print(summary)





SLIP 2

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

dataset = pd.read_csv("Salary.csv")

X = dataset.iloc[:, :-1].values

y = dataset.iloc[:,1].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)

from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

print(y_pred)

import matplotlib.pyplot as plt

plt.scatter(X_test , y_train , color = 'red')

plt.plot(X_train , regressor.predict(X_train) , color = 'green')

plt.title("Salary vs Purchases")

plt.xlabel('Purchases')

plt.ylabel('Salary')

plt.show()

Post a Comment

0 Comments