36 lines
987 B
Python
36 lines
987 B
Python
|
import joblib
|
||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
from sklearn.pipeline import make_pipeline
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
|
||
|
# random data
|
||
|
data = [
|
||
|
("' OR '1'='1", 1),
|
||
|
("SELECT * FROM users WHERE id=1", 1),
|
||
|
("DROP TABLE users;", 1),
|
||
|
("username=admin'--", 1),
|
||
|
("hello world", 0),
|
||
|
("this is a normal query", 0),
|
||
|
("select data from table", 0),
|
||
|
("just another harmless input", 0),
|
||
|
]
|
||
|
|
||
|
queries, labels = zip(*data)
|
||
|
|
||
|
# split data into training and testing sets
|
||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||
|
queries, labels, test_size=0.2, random_state=42
|
||
|
)
|
||
|
|
||
|
# build a pipeline with a vectorizer and a logistic regression model
|
||
|
pipeline = make_pipeline(CountVectorizer(), LogisticRegression())
|
||
|
|
||
|
# train the model
|
||
|
pipeline.fit(X_train, y_train)
|
||
|
|
||
|
# save the model to a file
|
||
|
joblib.dump(pipeline, "model.pkl")
|
||
|
|
||
|
print("Model trained and saved to model.pkl")
|