cspj-application/server-ml/training.py

import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# random data
data = [
    ("' OR '1'='1", 1),
    ("SELECT * FROM users WHERE id=1", 1),
    ("DROP TABLE users;", 1),
    ("username=admin'--", 1),
    ("hello world", 0),
    ("this is a normal query", 0),
    ("select data from table", 0),
    ("just another harmless input", 0),
]

queries, labels = zip(*data)

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    queries, labels, test_size=0.2, random_state=42
)

# build a pipeline with a vectorizer and a logistic regression model
pipeline = make_pipeline(CountVectorizer(), LogisticRegression())

# train the model
pipeline.fit(X_train, y_train)

# save the model to a file
joblib.dump(pipeline, "model.pkl")

print("Model trained and saved to model.pkl")
testing ml 2024-12-02 20:44:57 +08:00			`import joblib`
			`from sklearn.feature_extraction.text import CountVectorizer`
			`from sklearn.linear_model import LogisticRegression`
			`from sklearn.pipeline import make_pipeline`
			`from sklearn.model_selection import train_test_split`

			`# random data`
			`data = [`
			`("' OR '1'='1", 1),`
			`("SELECT * FROM users WHERE id=1", 1),`
			`("DROP TABLE users;", 1),`
			`("username=admin'--", 1),`
			`("hello world", 0),`
			`("this is a normal query", 0),`
			`("select data from table", 0),`
			`("just another harmless input", 0),`
			`]`

			`queries, labels = zip(*data)`

			`# split data into training and testing sets`
			`X_train, X_test, y_train, y_test = train_test_split(`
			`queries, labels, test_size=0.2, random_state=42`
			`)`

			`# build a pipeline with a vectorizer and a logistic regression model`
			`pipeline = make_pipeline(CountVectorizer(), LogisticRegression())`

			`# train the model`
			`pipeline.fit(X_train, y_train)`

			`# save the model to a file`
			`joblib.dump(pipeline, "model.pkl")`

			`print("Model trained and saved to model.pkl")`