def get_recent_files(dir_path, after_date):
# Convert the input string to a datetime object
after_date = datetime.strptime(after_date, '%Y-%m-%d')
# List all files in the directory
for root, dirs, files in os.walk(dir_path):
for file in files:
file_path = os.path.join(root, file)
# Get the modification time of the file
mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
# Check if the file was modified after the given date
if ".md" in file.lower():
if mod_time > after_date:
f = open(file_path, encoding="utf8")
text = f.read()
text = re.sub(regex, '', text, 0, re.DOTALL)
text = os.linesep.join([s for s in text.splitlines() if s])
print("\n");
print("Processing " + file + " ...")
print("\n")
text_chunks = text.splitlines()
for chunk in text_chunks:
if chunk != "":
meta = [{'text': chunk, 'filename': file}]
if (len(chunk) > 20):
vector_id = uuid.uuid4()
vector_id = str(vector_id)
print("Creating and indexing embed with id " + str(vector_id))
res = openai.Embedding.create(input=chunk, engine=cs.embedding_model)
embeds = [record['embedding'] for record in res['data']]
to_upsert = zip([vector_id], embeds, meta)
#print(meta)
index.upsert(vectors=list(to_upsert))
get_recent_files(cs.directory, cs.date_modified)