import joblib from sklearn.feature_extraction.text import CountVectorizer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split # random data data = [ ("' OR '1'='1", 1), ("SELECT * FROM users WHERE id=1", 1), ("DROP TABLE users;", 1), ("username=admin'--", 1), ("hello world", 0), ("this is a normal query", 0), ("select data from table", 0), ("just another harmless input", 0), ] queries, labels = zip(*data) # split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split( queries, labels, test_size=0.2, random_state=42 ) # build a pipeline with a vectorizer and a logistic regression model pipeline = make_pipeline(CountVectorizer(), LogisticRegression()) # train the model pipeline.fit(X_train, y_train) # save the model to a file joblib.dump(pipeline, "model.pkl") print("Model trained and saved to model.pkl")