我正在编写python程序,我需要做的一件事就是从一个大型ndjson文件中删除一行文本。所以我想知道我想要做的是可以实现的。下面是一个来自大型ndjson文件的摘录(抱歉,因为它很长,这里只有两个条目)。我需要的是标记为"<<“。
{ "took" : 27, "timed_out" : false, "_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0 }, "hits" : {
"total" : {
"value" : 2008,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
================================FIRST ENTRY============================================
{
"_index" : "kibana-detections",
"_type" : "_doc",
"_id" : "alert:4ad48c3e-fc66-11ec-a8b4-5df1d1275a3c",
"_score" : 1.0,
"_source" : {
"alert" : {
"name" : "Modification or Removal of an Okta Application Sign-On Policy",
"tags" : [
"Elastic",
"Identity",
"Okta",
"Continuous Monitoring",
"SecOps",
"Identity and Access",
"__internal_rule_id:cd16fb10-0261-46e8-9932-a0336278cdbe",
"__internal_immutable:true"
],
"alertTypeId" : "siem.signals",
"consumer" : "siem",
"params" : { << What I need - From Here ( '{' onwards)
"author" : [
"Elastic"
],
"description" : "Detects attempts to modify or delete a sign on policy for an Okta application. An adversary may attempt to modify or delete the sign on policy for an Okta application in order to remove or weaken an organization's security controls.",
"ruleId" : "cd16fb10-0261-46e8-9932-a0336278cdbe",
"falsePositives" : [
"Consider adding exceptions to this rule to filter false positives if sign on policies for Okta applications are regularly modified or deleted in your organization."
],
"from" : "now-6m",
"immutable" : true,
"license" : "Elastic License v2",
"outputIndex" : ".siem-signals-default",
"maxSignals" : 100,
"riskScore" : 47,
"riskScoreMapping" : [ ],
"severity" : "medium",
"severityMapping" : [ ],
"threat" : [ ],
"timestampOverride" : "event.ingested",
"to" : "now",
"references" : [
"https://help.okta.com/en/prod/Content/Topics/Security/App_Based_Signon.htm",
"https://developer.okta.com/docs/reference/api/system-log/",
"https://developer.okta.com/docs/reference/api/event-types/"
],
"note" : "## Config\n\nThe Okta Fleet integration, Filebeat module, or similarly structured data is required to be compatible with this rule.",
"version" : 6,
"exceptionsList" : [ ],
"index" : [
"filebeat-*",
"logs-okta*"
],
"query" : "event.dataset:okta.system and event.action:(application.policy.sign_on.update or application.policy.sign_on.rule.delete)\n",
"language" : "kuery",
"type" : "query"
}, << To here
"schedule" : {
"interval" : "5m"
},
"enabled" : false,
"actions" : [ ],
"throttle" : null,
"notifyWhen" : "onActiveAlert",
"apiKeyOwner" : null,
"apiKey" : null,
"legacyId" : "4ad48c3e-fc66-11ec-a8b4-5df1d1275a3c",
"createdBy" : "elastic",
"updatedBy" : "elastic",
"createdAt" : "2022-07-05T13:28:02.747Z",
"updatedAt" : "2022-07-05T13:28:02.747Z",
"muteAll" : false,
"mutedInstanceIds" : [ ],
"executionStatus" : {
"status" : "pending",
"lastExecutionDate" : "2022-07-05T13:28:02.747Z",
"error" : null
},
"meta" : {
"versionApiKeyLastmodified" : "7.17.3"
}
},
"type" : "alert",
"references" : [ ],
"migrationVersion" : {
"alert" : "7.16.0"
},
"coreMigrationVersion" : "7.17.3",
"updated_at" : "2022-07-05T13:28:02.747Z"
}
},
======================================SECOND ENTRY=========================================
{
"_index" : "kibana-detections",
"_type" : "_doc",
"_id" : "alert:4ad883de-fc66-11ec-a8b4-5df1d1275a3c",
"_score" : 1.0,
"_ignored" : [
"alert.params.query.keyword",
"alert.params.description.keyword",
"alert.params.note.keyword"
],
"_source" : {
"alert" : {
"name" : "AdminSDHolder SDProp Exclusion Added",
"tags" : [
"Elastic",
"Host",
"Windows",
"Threat Detection",
"Persistence",
"Active Directory",
"__internal_rule_id:61d29caf-6c15-4d1e-9ccb-7ad12ccc0bc7",
"__internal_immutable:true"
],
"alertTypeId" : "siem.signals",
"consumer" : "siem",
"params" : { << What I need - From here ('{' onwards)
"author" : [
"Elastic"
],
"description" : "Identifies a modification on the dsHeuristics attribute on the bit that holds the configuration of groups excluded from the SDProp process. The SDProp compares the permissions on protected objects with those defined on the AdminSDHolder object. If the permissions on any of the protected accounts and groups do not match, the permissions on the protected accounts and groups are reset to match those of the domain's AdminSDHolder object, meaning that groups excluded will remain unchanged. Attackers can abuse this misconfiguration to maintain long-term access to privileged accounts in these groups.",
"ruleId" : "61d29caf-6c15-4d1e-9ccb-7ad12ccc0bc7",
"falsePositives" : [ ],
"from" : "now-9m",
"immutable" : true,
"license" : "Elastic License v2",
"outputIndex" : ".siem-signals-default",
"maxSignals" : 100,
"riskScore" : 73,
"riskScoreMapping" : [ ],
"severity" : "high",
"severityMapping" : [ ],
"threat" : [
{
"framework" : "MITRE ATT&CK",
"tactic" : {
"id" : "TA0003",
"name" : "Persistence",
"reference" : "https://attack.mitre.org/tactics/TA0003/"
},
"technique" : [ ]
}
],
"timestampOverride" : "event.ingested",
"to" : "now",
"references" : [
"https://www.cert.ssi.gouv.fr/uploads/guide-ad.html#dsheuristics_bad",
"https://petri.com/active-directory-security-understanding-adminsdholder-object"
],
"note" : "## Triage and analysis\n\n### .\n",
"version" : 2,
"exceptionsList" : [ ],
"index" : [
"winlogbeat-*",
"logs-system.*"
],
"query" : "any where event.action == \"Directory Service Changes\" and\n event.code == \"5136\" and\n winlog.event_data.AttributeLDAPDisplayName : \"dSHeuristics\" and\n length(winlog.event_data.AttributeValue) > 15 and\n winlog.event_data.AttributeValue regex~ \"[0-9]{15}([1-9a-f]).*\"\n",
"language" : "eql",
"type" : "eql"
}, << To here
"schedule" : {
"interval" : "5m"
},
"enabled" : false,
"actions" : [ ],
"throttle" : null,
"notifyWhen" : "onActiveAlert",
"apiKeyOwner" : null,
"apiKey" : null,
"legacyId" : "4ad883de-fc66-11ec-a8b4-5df1d1275a3c",
"createdBy" : "elastic",
"updatedBy" : "elastic",
"createdAt" : "2022-07-05T13:28:05.532Z",
"updatedAt" : "2022-07-05T13:28:05.532Z",
"muteAll" : false,
"mutedInstanceIds" : [ ],
"executionStatus" : {
"status" : "pending",
"lastExecutionDate" : "2022-07-05T13:28:05.532Z",
"error" : null
},
"meta" : {
"versionApiKeyLastmodified" : "7.17.3"
}
},
"type" : "alert",
"references" : [ ],
"migrationVersion" : {
"alert" : "7.16.0"
},
"coreMigrationVersion" : "7.17.3",
"updated_at" : "2022-07-05T13:28:05.532Z"
}
}, ....所以TLDR,我需要的是整个文件中的所有多个条目
{
author:....
....
....
type: ..
}把剩下的都拿走。
所以问题是:这是否有可能是用Pythonic的方式和/或用regex实现的?还是做起来太复杂了?
发布于 2022-08-17 06:18:22
您可以使用python库(https://docs.python.org/3/library/json.html)并将ndjson文件读入python字典。
使用它,您可以编写一小部分代码,提取相关数据并将其转储回文本文件。
我真希望这能帮上忙
https://stackoverflow.com/questions/73383625
复制相似问题