Python/TA
python: pytorch, 질의응답관련 , bert 사용
sucun
2020. 8. 13. 18:29
In [4]:
## BertForMaskedLM
from transformers import BertTokenizer, BertForMaskedLM
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
# print(input_ids)
outputs = model(input_ids, labels=input_ids)
loss, prediction_scores = outputs[:2]
print(type(loss),loss)
print(type(prediction_scores),prediction_scores)
In [8]:
## BertForNextSentencePrediction
from transformers import BertTokenizer, BertForNextSentencePrediction
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
assert logits[0, 0] < logits[0, 1] # next sentence was random
print('loss',type(loss),loss)
print('logits',type(logits),logits)
print('encoding',type(encoding),encoding)
In [10]:
## BertForSequenceClassification
from transformers import BertTokenizer, BertForSequenceClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(**inputs, labels=labels)
loss, logits = outputs[:2]
print('loss',type(loss),loss)
print('logits',type(logits),logits)
In [18]:
## BertForMultipleChoice
from transformers import BertTokenizer, BertForMultipleChoice
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
choice0 = "It is eaten with a fork and a knife."
choice1 = "It is eaten while held in the hand."
choice2 = "It is eaten while held in the handle."
choice3 = "It is eaten while held in the way."
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
# the linear classifier still needs to be trained
loss, logits = outputs[:2]
print('loss',type(loss),loss)
print('logits',type(logits),logits)
In [20]:
from transformers import BertTokenizer, BertForTokenClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
outputs = model(**inputs, labels=labels)
loss, scores = outputs[:2]
print('loss',type(loss),loss)
print('logits',type(logits),logits)
In [23]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:3]
print('loss',type(loss),loss)
print('start_scores',type(start_scores),start_scores)
print('outputs',type(outputs),outputs)
Out[23]:
In [25]:
from __future__ import print_function
from transformers import pipeline
nlp_sentence_classif = pipeline('sentiment-analysis')
nlp_sentence_classif('Such a nice weather outside !')
Out[25]:
In [26]:
nlp_token_class = pipeline('ner')
nlp_token_class('Hugging Face is a French company based in New-York.')
Out[26]:
In [29]:
nlp_qa = pipeline('question-answering')
nlp_qa(context="Hello, my dog is cute", question='how about my dog?')
nlp_qa(context="Hello, my cat is baby", question='who is my baby?')
Out[29]:
In [39]:
nlp_qa(context="this weather, 오늘은 날씨가 좋습니다.", question='오늘 날씨 어때요?')
nlp_qa(context="어제는 날씨가 안 좋았습니다, 오늘은 날씨가 좋습니다.", question='오늘 날씨 어때요?')
## 한국어가 안되는 이유를 찾는중
In [ ]:
from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.train()
반응형