GitHub - NuriaGutierrez/matchString: Python compare methods of string matching
pip install distance numpy pandas matplotlib sklearn seaborn python-Levenshtein
database1 = "./D/database_1.csv"
database2 = "./D/database_2.csv"
train_data_file = "./D/train.csv"
test_data_file = "./D/test.csv"
tfidf_matrix_train,dictTrain,tfidf_matrix_trainBigrams,dictTrainBigrams,lenGram = createTFIDF(database1,database2)
clf,clf2 = train(train_data_file,tfidf_matrix_train,dictTrain,tfidf_matrix_trainBigrams,dictTrainBigrams,lenGram,sep="\t")
predict = test(test_data_file,tfidf_matrix_train,dictTrain,tfidf_matrix_trainBigrams,dictTrainBigrams,lenGram,clf,clf2,sep="\t")
plot(predict)
distances = find_distances(st1,st2)
clf.decision_function(np.array(temp,dtype=float))